diff --git a/CHANGELOG.md b/CHANGELOG.md index 2e1f4de9f75..eee0980939b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,6 +42,7 @@ Docs: https://docs.openclaw.ai ### Fixes - Diagnostics: keep webhook/message OTEL attributes and Prometheus delivery labels low-cardinality and omit raw chat/message IDs from spans, so progress-draft and message-tool modes do not leak high-cardinality messaging identifiers. +- Google Meet: stop advertising legacy `mode: "realtime"` to agents and config UIs, while keeping it as a hidden compatibility alias for `mode: "agent"`, so new joins use the STT -> OpenClaw agent -> TTS path instead of selecting the direct realtime voice fallback. - Telegram: render shared interactive reply buttons in reply delivery so plugin approval messages show inline keyboards. (#76238) Thanks @keshavbotagent. - Agents/cli-runner: drop a saved `claude-cli` resume sessionId at preparation time when its on-disk transcript no longer exists in `~/.claude/projects/`, so a stale binding from a half-installed `update.run` cannot trap follow-up runs (auto-reply / Telegram direct) in a `claude --resume` timeout loop; the run starts fresh and the new sessionId is written back through the existing post-run flow. (#77030; refs #77011) Thanks @openperf. - Release validation: install the cross-OS TypeScript harness through Windows-safe Node/npm shims so native Windows package checks reach the OpenClaw smoke suites instead of exiting before artifact capture. Thanks @vincentkoc. diff --git a/docs/plugins/google-meet.md b/docs/plugins/google-meet.md index ebf6b6ddefc..6a28f474aa0 100644 --- a/docs/plugins/google-meet.md +++ b/docs/plugins/google-meet.md @@ -1,5 +1,5 @@ --- -summary: "Google Meet plugin: join explicit Meet URLs through Chrome or Twilio with realtime voice defaults" +summary: "Google Meet plugin: join explicit Meet URLs through Chrome or Twilio with agent talk-back defaults" read_when: - You want an OpenClaw agent to join a Google Meet call - You want an OpenClaw agent to create a new Google Meet call @@ -12,12 +12,12 @@ Google Meet participant support for OpenClaw — the plugin is explicit by desig - It only joins an explicit `https://meet.google.com/...` URL. - It can create a new Meet space through the Google Meet API, then join the returned URL. -- `realtime` voice is the default mode. -- Realtime voice can call back into the full OpenClaw agent when deeper - reasoning or tools are needed. -- Agents choose the join behavior with `mode`: use `realtime` for live - listen/talk-back, or `transcribe` to join/control the browser without the - realtime voice bridge. +- `agent` is the default talk-back mode: realtime transcription listens, the + configured OpenClaw agent answers, and regular OpenClaw TTS speaks into Meet. +- `bidi` remains available as the fallback direct realtime voice model mode. +- Agents choose the join behavior with `mode`: use `agent` for live + listen/talk-back, `bidi` for direct realtime voice fallback, or `transcribe` + to join/control the browser without the talk-back bridge. - Auth starts as personal Google OAuth or an already signed-in Chrome profile. - There is no automatic consent announcement. - The default Chrome audio backend is `BlackHole 2ch`. @@ -29,9 +29,10 @@ Google Meet participant support for OpenClaw — the plugin is explicit by desig ## Quick start -Install the local audio dependencies and configure a backend realtime voice -provider. OpenAI is the default; Google Gemini Live also works with -`realtime.provider: "google"`: +Install the local audio dependencies and configure a realtime transcription +provider plus regular OpenClaw TTS. OpenAI is the default transcription +provider; Google Gemini Live also works with `realtime.provider: "google"` for +`bidi` mode: ```bash brew install blackhole-2ch sox @@ -130,7 +131,7 @@ participation. Create a new meeting and join it: ```bash -openclaw googlemeet create --transport chrome-node --mode realtime +openclaw googlemeet create --transport chrome-node --mode agent ``` For API-created rooms, use Google Meet `SpaceConfig.accessType` when you want @@ -138,7 +139,7 @@ the room's no-knock policy to be explicit instead of inherited from the Google account defaults: ```bash -openclaw googlemeet create --access-type OPEN --transport chrome-node --mode realtime +openclaw googlemeet create --access-type OPEN --transport chrome-node --mode agent ``` `OPEN` lets anyone with the Meet URL join without knocking. `TRUSTED` lets the @@ -177,15 +178,15 @@ can explain which path was used. `create` joins the new meeting by default and returns `joined: true` plus the join session. To only mint the URL, use `create --no-join` on the CLI or pass `"join": false` to the tool. -Or tell an agent: "Create a Google Meet, join it with realtime voice, and send -me the link." The agent should call `google_meet` with `action: "create"` and -then share the returned `meetingUri`. +Or tell an agent: "Create a Google Meet, join it with the agent talk-back mode, +and send me the link." The agent should call `google_meet` with +`action: "create"` and then share the returned `meetingUri`. ```json { "action": "create", "transport": "chrome-node", - "mode": "realtime" + "mode": "agent" } ``` @@ -818,7 +819,7 @@ Agents can also create an API-backed room with an explicit access policy: { "action": "create", "transport": "chrome-node", - "mode": "realtime", + "mode": "agent", "accessType": "OPEN" } ``` @@ -1000,8 +1001,8 @@ Set the plugin config under `plugins.entries.google-meet.config`: Defaults: - `defaultTransport: "chrome"` -- `defaultMode: "agent"` (`"realtime"` is accepted as a compatibility alias for - `"agent"`) +- `defaultMode: "agent"` (`"realtime"` is accepted only as a legacy + compatibility alias for `"agent"`; new tool calls should say `"agent"`) - `chromeNode.node`: optional node id/name/IP for `chrome-node` - `chrome.audioBackend: "blackhole-2ch"` - `chrome.guestName: "OpenClaw Agent"`: name used on the signed-out Meet guest @@ -1011,7 +1012,7 @@ Defaults: - `chrome.reuseExistingTab: true`: activate an existing Meet tab instead of opening duplicates - `chrome.waitForInCallMs: 20000`: wait for the Meet tab to report in-call - before the realtime intro is triggered + before the talk-back intro is triggered - `chrome.audioFormat: "pcm16-24khz"`: command-pair audio format. Use `"g711-ulaw-8khz"` only for legacy/custom command pairs that still emit telephony audio. @@ -1140,8 +1141,8 @@ Gateway host, so model credentials stay there. With the default `mode: "agent"`, the realtime transcription provider handles listening, the configured OpenClaw agent produces the answer, and regular OpenClaw TTS speaks it into Meet. Use `mode: "bidi"` when you want the realtime voice model to answer directly. -`mode: "realtime"` remains accepted as a compatibility alias for -`mode: "agent"`. +Raw `mode: "realtime"` remains accepted as a legacy compatibility alias for +`mode: "agent"`, but it is no longer advertised in the agent tool schema. Use `action: "status"` to list active sessions or inspect a session ID. Use `action: "speak"` with `sessionId` and `message` to make the realtime agent diff --git a/extensions/google-meet/index.test.ts b/extensions/google-meet/index.test.ts index 1600fb49c53..e07cf525644 100644 --- a/extensions/google-meet/index.test.ts +++ b/extensions/google-meet/index.test.ts @@ -386,6 +386,7 @@ describe("google-meet plugin", () => { oauth: {}, auth: { provider: "google-oauth" }, }); + expect(resolveGoogleMeetConfig({ defaultMode: "realtime" }).defaultMode).toBe("agent"); expect(resolveGoogleMeetConfig({}).realtime.instructions).toContain("openclaw_agent_consult"); }); @@ -621,7 +622,7 @@ describe("google-meet plugin", () => { description: expect.stringContaining("recover_current_tab"), }, transport: { type: "string", enum: ["chrome", "chrome-node", "twilio"] }, - mode: { type: "string", enum: ["agent", "bidi", "realtime", "transcribe"] }, + mode: { type: "string", enum: ["agent", "bidi", "transcribe"] }, }, }); }); @@ -3154,7 +3155,12 @@ describe("google-meet plugin", () => { createdAt: "2026-04-27T00:00:00.000Z", updatedAt: "2026-04-27T00:00:00.000Z", participantIdentity: "signed-in Google Chrome profile", - realtime: { enabled: true, provider: "openai", toolPolicy: "safe-read-only" }, + realtime: { + enabled: true, + strategy: "agent", + transcriptionProvider: "openai", + toolPolicy: "safe-read-only", + }, chrome: { audioBackend: "blackhole-2ch", launched: true, diff --git a/extensions/google-meet/index.ts b/extensions/google-meet/index.ts index c9c009372cf..dc81c56e6bb 100644 --- a/extensions/google-meet/index.ts +++ b/extensions/google-meet/index.ts @@ -155,10 +155,14 @@ const googleMeetConfigSchema = { help: "Legacy realtime alias setting. Use mode=agent or mode=bidi for new Meet joins.", }, "realtime.provider": { - label: "Realtime Provider", - help: "Defaults to OpenAI; uses OPENAI_API_KEY when no provider config is set.", + label: "Speech Provider", + help: "Agent mode uses this for realtime transcription. Bidi mode uses it as the realtime voice provider.", + }, + "realtime.model": { + label: "Bidi Realtime Model", + help: "Only used by mode=bidi. Agent mode answers with the configured OpenClaw agent and regular TTS.", + advanced: true, }, - "realtime.model": { label: "Realtime Model", advanced: true }, "realtime.instructions": { label: "Realtime Instructions", advanced: true }, "realtime.introMessage": { label: "Realtime Intro Message", @@ -238,9 +242,9 @@ const GoogleMeetToolSchema = Type.Object({ ), mode: Type.Optional( Type.String({ - enum: ["agent", "bidi", "realtime", "transcribe"], + enum: ["agent", "bidi", "transcribe"], description: - "Join mode. agent uses realtime transcription, the configured OpenClaw agent, and regular TTS. bidi uses the realtime voice model directly. realtime is a compatibility alias for agent. transcribe joins observe-only.", + "Join mode. agent uses realtime transcription, the configured OpenClaw agent, and regular TTS. bidi uses the realtime voice model directly. transcribe joins observe-only.", }), ), dialInNumber: Type.Optional( diff --git a/extensions/google-meet/openclaw.plugin.json b/extensions/google-meet/openclaw.plugin.json index e0db9c5d2ea..5ee9b6e2378 100644 --- a/extensions/google-meet/openclaw.plugin.json +++ b/extensions/google-meet/openclaw.plugin.json @@ -148,11 +148,12 @@ "help": "Legacy realtime alias setting. Use mode=agent or mode=bidi for new Meet joins." }, "realtime.provider": { - "label": "Realtime Provider", - "help": "Defaults to OpenAI; uses OPENAI_API_KEY when no provider config is set." + "label": "Speech Provider", + "help": "Agent mode uses this for realtime transcription. Bidi mode uses it as the realtime voice provider." }, "realtime.model": { - "label": "Realtime Model", + "label": "Bidi Realtime Model", + "help": "Only used by mode=bidi. Agent mode answers with the configured OpenClaw agent and regular TTS.", "advanced": true }, "realtime.instructions": { @@ -227,7 +228,7 @@ }, "defaultMode": { "type": "string", - "enum": ["agent", "bidi", "realtime", "transcribe"], + "enum": ["agent", "bidi", "transcribe"], "default": "agent" }, "chrome": { diff --git a/extensions/google-meet/src/cli.ts b/extensions/google-meet/src/cli.ts index b688f241346..f115173f87d 100644 --- a/extensions/google-meet/src/cli.ts +++ b/extensions/google-meet/src/cli.ts @@ -11,7 +11,7 @@ import { listGoogleMeetCalendarEvents, type GoogleMeetCalendarLookupResult, } from "./calendar.js"; -import type { GoogleMeetConfig, GoogleMeetMode, GoogleMeetTransport } from "./config.js"; +import type { GoogleMeetConfig, GoogleMeetModeInput, GoogleMeetTransport } from "./config.js"; import { hasCreateSpaceConfigInput, resolveCreateSpaceConfig } from "./create.js"; import { buildGoogleMeetPreflightReport, @@ -37,7 +37,7 @@ import type { GoogleMeetRuntime } from "./runtime.js"; type JoinOptions = { transport?: GoogleMeetTransport; - mode?: GoogleMeetMode; + mode?: GoogleMeetModeInput; message?: string; timeoutMs?: string; dialInNumber?: string; @@ -134,7 +134,7 @@ export type GoogleMeetExportManifest = { type SetupOptions = { json?: boolean; - mode?: GoogleMeetMode; + mode?: GoogleMeetModeInput; transport?: GoogleMeetTransport; }; @@ -181,7 +181,7 @@ type CreateOptions = { entryPointAccess?: string; join?: boolean; transport?: GoogleMeetTransport; - mode?: GoogleMeetMode; + mode?: GoogleMeetModeInput; message?: string; dialInNumber?: string; pin?: string; @@ -349,12 +349,17 @@ function writeDoctorStatus(status: Awaited", "Transport to check: chrome, chrome-node, or twilio") - .option("--mode ", "Mode to check: realtime or transcribe") + .option("--mode ", "Mode to check: agent, bidi, or transcribe") .option("--json", "Print JSON output", false) .action(async (options: SetupOptions) => { const rt = await params.ensureRuntime(); diff --git a/extensions/google-meet/src/runtime.ts b/extensions/google-meet/src/runtime.ts index 003afc8b5e0..a4588e55f15 100644 --- a/extensions/google-meet/src/runtime.ts +++ b/extensions/google-meet/src/runtime.ts @@ -3,7 +3,12 @@ import type { OpenClawConfig } from "openclaw/plugin-sdk/config-types"; import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime"; import type { PluginRuntime, RuntimeLogger } from "openclaw/plugin-sdk/plugin-runtime"; import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime"; -import type { GoogleMeetConfig, GoogleMeetMode, GoogleMeetTransport } from "./config.js"; +import type { + GoogleMeetConfig, + GoogleMeetMode, + GoogleMeetModeInput, + GoogleMeetTransport, +} from "./config.js"; import { addGoogleMeetSetupCheck, getGoogleMeetSetupStatus } from "./setup.js"; import { isSameMeetUrlForReuse, resolveChromeNodeInfo } from "./transports/chrome-browser-proxy.js"; import { createMeetWithBrowserProxyOnNode } from "./transports/chrome-create.js"; @@ -60,8 +65,8 @@ function resolveTransport(input: GoogleMeetTransport | undefined, config: Google return input ?? config.defaultTransport; } -function resolveMode(input: GoogleMeetMode | undefined, config: GoogleMeetConfig) { - return input ?? config.defaultMode; +function resolveMode(input: GoogleMeetModeInput | undefined, config: GoogleMeetConfig) { + return input === "realtime" ? "agent" : (input ?? config.defaultMode); } function isGoogleMeetTalkBackMode(mode: GoogleMeetMode): boolean { @@ -245,7 +250,7 @@ export class GoogleMeetRuntime { async setupStatus( options: { transport?: GoogleMeetTransport; - mode?: GoogleMeetMode; + mode?: GoogleMeetModeInput; dialInNumber?: string; } = {}, ) { @@ -397,8 +402,9 @@ export class GoogleMeetRuntime { realtime: { enabled: isGoogleMeetTalkBackMode(mode), strategy: mode === "bidi" ? "bidi" : "agent", - provider: this.params.config.realtime.provider, - model: this.params.config.realtime.model, + provider: mode === "bidi" ? this.params.config.realtime.provider : undefined, + model: mode === "bidi" ? this.params.config.realtime.model : undefined, + transcriptionProvider: mode === "agent" ? this.params.config.realtime.provider : undefined, toolPolicy: this.params.config.realtime.toolPolicy, }, notes: [], @@ -690,7 +696,8 @@ export class GoogleMeetRuntime { recentTranscript?: GoogleMeetChromeHealth["recentTranscript"]; session: GoogleMeetSession; }> { - if (request.mode && isGoogleMeetTalkBackMode(request.mode)) { + const requestedMode = request.mode ? resolveMode(request.mode, this.params.config) : undefined; + if (requestedMode && isGoogleMeetTalkBackMode(requestedMode)) { throw new Error( "test_listen requires mode: transcribe; use test_speech for talk-back sessions.", ); diff --git a/extensions/google-meet/src/transports/types.ts b/extensions/google-meet/src/transports/types.ts index 54c7d926192..b3249221454 100644 --- a/extensions/google-meet/src/transports/types.ts +++ b/extensions/google-meet/src/transports/types.ts @@ -1,11 +1,11 @@ -import type { GoogleMeetMode, GoogleMeetTransport } from "../config.js"; +import type { GoogleMeetMode, GoogleMeetModeInput, GoogleMeetTransport } from "../config.js"; type GoogleMeetSessionState = "active" | "ended"; export type GoogleMeetJoinRequest = { url: string; transport?: GoogleMeetTransport; - mode?: GoogleMeetMode; + mode?: GoogleMeetModeInput; message?: string; timeoutMs?: number; dialInNumber?: string; @@ -106,6 +106,7 @@ export type GoogleMeetSession = { strategy?: string; provider?: string; model?: string; + transcriptionProvider?: string; toolPolicy: string; }; chrome?: {