diff --git a/CHANGELOG.md b/CHANGELOG.md index 2e0b4f1a221..bc747ddfac2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -58,6 +58,7 @@ Docs: https://docs.openclaw.ai - Release/beta smoke: resolve the dispatched Telegram beta E2E run from `gh run list` when `gh workflow run` returns no run URL, so the maintainer helper does not fail immediately after dispatch. Thanks @vincentkoc. - Media/images: keep HEIC/HEIF attachments fail-closed when optional Sharp conversion is unavailable instead of sending originals that still need conversion. Thanks @vincentkoc. +- Google Meet: fork the caller's current agent transcript into agent-mode meeting consultant sessions, so Meet replies inherit the context from the tool call that joined the meeting. - Telegram/streaming: sanitize tool-progress draft preview backticks before shared compaction, so long backtick-heavy progress text still renders inside the safe code-formatted preview instead of collapsing to an ellipsis. - UI/chat: remove the unsupported `line-clamp` declaration from the chat queue text rule to eliminate Firefox console noise without changing visible truncation behavior. Thanks @ZanderH-code. - Agents/Pi: suppress persistence for synthetic mid-turn overflow continuation prompts, so transcript-retry recovery does not write the "continue from transcript" prompt as a new user turn. Thanks @vincentkoc. diff --git a/docs/plugins/google-meet.md b/docs/plugins/google-meet.md index 9e5cd6d1351..162e26e14b7 100644 --- a/docs/plugins/google-meet.md +++ b/docs/plugins/google-meet.md @@ -1707,6 +1707,11 @@ Chrome talk-back modes need `BlackHole 2ch` plus either: audio path and must exit after starting or validating its daemon. This is only valid for `bidi` because `agent` mode needs direct command-pair access for TTS. +When an agent calls the `google_meet` tool in agent mode, the meeting consultant +session forks the caller's current transcript before answering participant +speech. The Meet session still stays separate (`agent::subagent:google-meet:`) +so meeting follow-ups do not mutate the caller transcript directly. + For clean duplex audio, route Meet output and Meet microphone through separate virtual devices or a Loopback-style virtual device graph. A single shared BlackHole device can echo other participants back into the call. diff --git a/extensions/google-meet/index.test.ts b/extensions/google-meet/index.test.ts index ddc876e2245..966b7aa7aaf 100644 --- a/extensions/google-meet/index.test.ts +++ b/extensions/google-meet/index.test.ts @@ -1259,6 +1259,32 @@ describe("google-meet plugin", () => { }); }); + it("passes the caller session key through tool joins for agent context forking", async () => { + const { tools } = setup( + {}, + { toolContext: { sessionKey: "agent:main:discord:channel:general" } }, + ); + const gatewayParams: unknown[] = []; + googleMeetPluginTesting.setCallGatewayFromCliForTests(async (_method, _opts, params) => { + gatewayParams.push(params); + return { ok: true }; + }); + const tool = tools[0] as { + execute: (id: string, params: unknown) => Promise; + }; + + await tool.execute("id", { + action: "join", + url: "https://meet.google.com/abc-defg-hij", + requesterSessionKey: "agent:main:wrong", + }); + + expect(gatewayParams[0]).toMatchObject({ + url: "https://meet.google.com/abc-defg-hij", + requesterSessionKey: "agent:main:discord:channel:general", + }); + }); + it("explains that Twilio joins need dial-in details", async () => { const { tools } = setup({ defaultTransport: "twilio" }); const tool = tools[0] as { diff --git a/extensions/google-meet/index.ts b/extensions/google-meet/index.ts index 6da6ee71b43..98fcec84412 100644 --- a/extensions/google-meet/index.ts +++ b/extensions/google-meet/index.ts @@ -741,6 +741,7 @@ export default definePluginEntry({ pin: normalizeOptionalString(params?.pin), dtmfSequence: normalizeOptionalString(params?.dtmfSequence), message: normalizeOptionalString(params?.message), + requesterSessionKey: normalizeOptionalString(params?.requesterSessionKey), }); respond(true, result); } catch (err) { @@ -992,6 +993,7 @@ export default definePluginEntry({ pin: normalizeOptionalString(params?.pin), dtmfSequence: normalizeOptionalString(params?.dtmfSequence), message: normalizeOptionalString(params?.message), + requesterSessionKey: normalizeOptionalString(params?.requesterSessionKey), }); respond(true, result); } catch (err) { @@ -1018,155 +1020,176 @@ export default definePluginEntry({ }, ); - api.registerTool({ - name: "google_meet", - label: "Google Meet", - description: - "Join and track Google Meet sessions through Chrome or Twilio. Call setup_status before join/create/test_listen/test_speech; if it reports a Chrome node offline, local audio missing, or missing Twilio dial plan, surface that blocker instead of retrying or switching transports. Twilio cannot dial a Meet URL directly: provide dialInNumber plus optional pin/dtmfSequence, or configure twilio.defaultDialInNumber. Offline nodes are diagnostics only, not usable candidates. If local Chrome talk-back audio is unsupported on this OS, use mode=transcribe, transport=twilio, or a macOS chrome-node for agent/bidi Chrome. If a Meet tab is already open after a timeout, call recover_current_tab before retrying join to report login, permission, or admission blockers without opening another tab.", - parameters: GoogleMeetToolSchema, - async execute(_toolCallId, params) { - const raw = asParamRecord(params); - try { - assertGoogleMeetAgentToolActionSupported({ config, raw }); - switch (raw.action) { - case "join": { - return json(await callGoogleMeetGatewayFromTool({ config, action: "join", raw })); - } - case "create": { - return json(await callGoogleMeetGatewayFromTool({ config, action: "create", raw })); - } - case "test_speech": { - return json( - await callGoogleMeetGatewayFromTool({ config, action: "test_speech", raw }), - ); - } - case "test_listen": { - return json( - await callGoogleMeetGatewayFromTool({ config, action: "test_listen", raw }), - ); - } - case "status": { - return json(await callGoogleMeetGatewayFromTool({ config, action: "status", raw })); - } - case "recover_current_tab": { - return json( - await callGoogleMeetGatewayFromTool({ - config, - action: "recover_current_tab", - raw, - }), - ); - } - case "setup_status": { - return json( - await callGoogleMeetGatewayFromTool({ config, action: "setup_status", raw }), - ); - } - case "resolve_space": { - const { token: _token, ...result } = await resolveSpaceFromParams(config, raw); - return json(result); - } - case "preflight": { - const { meeting, token, space } = await resolveSpaceFromParams(config, raw); - return json( - buildGoogleMeetPreflightReport({ - input: meeting, - space, - previewAcknowledged: config.preview.enrollmentAcknowledged, - tokenSource: token.refreshed ? "refresh-token" : "cached-access-token", - }), - ); - } - case "latest": { - const token = await resolveGoogleMeetTokenFromParams(config, raw); - const resolved = await resolveMeetingFromParams({ - config, - raw, - accessToken: token.accessToken, - }); - return json({ - ...(await fetchLatestGoogleMeetConferenceRecord({ - accessToken: token.accessToken, - meeting: resolved.meeting, - })), - ...(resolved.calendarEvent ? { calendarEvent: resolved.calendarEvent } : {}), - }); - } - case "calendar_events": { - const token = await resolveGoogleMeetTokenFromParams(config, raw); - const window = raw.today === true ? buildGoogleMeetCalendarDayWindow() : {}; - return json( - await listGoogleMeetCalendarEvents({ - accessToken: token.accessToken, - calendarId: normalizeOptionalString(raw.calendarId), - eventQuery: normalizeOptionalString(raw.event), - ...window, - }), - ); - } - case "artifacts": { - const resolved = await resolveArtifactQueryFromParams(config, raw); - return json( - await fetchGoogleMeetArtifacts({ - accessToken: resolved.token.accessToken, - meeting: resolved.meeting, - conferenceRecord: resolved.conferenceRecord, - pageSize: resolved.pageSize, - includeTranscriptEntries: resolved.includeTranscriptEntries, - includeDocumentBodies: resolved.includeDocumentBodies, - allConferenceRecords: resolved.allConferenceRecords, - }), - ); - } - case "attendance": { - const resolved = await resolveArtifactQueryFromParams(config, raw); - return json( - await fetchGoogleMeetAttendance({ - accessToken: resolved.token.accessToken, - meeting: resolved.meeting, - conferenceRecord: resolved.conferenceRecord, - pageSize: resolved.pageSize, - allConferenceRecords: resolved.allConferenceRecords, - mergeDuplicateParticipants: resolved.mergeDuplicateParticipants, - lateAfterMinutes: resolved.lateAfterMinutes, - earlyBeforeMinutes: resolved.earlyBeforeMinutes, - }), - ); - } - case "export": { - return json(await exportGoogleMeetBundleFromParams(config, raw)); - } - case "leave": { - const sessionId = normalizeOptionalString(raw.sessionId); - if (!sessionId) { - throw new Error("sessionId required"); + api.registerTool( + (toolContext) => ({ + name: "google_meet", + label: "Google Meet", + description: + "Join and track Google Meet sessions through Chrome or Twilio. Call setup_status before join/create/test_listen/test_speech; if it reports a Chrome node offline, local audio missing, or missing Twilio dial plan, surface that blocker instead of retrying or switching transports. Twilio cannot dial a Meet URL directly: provide dialInNumber plus optional pin/dtmfSequence, or configure twilio.defaultDialInNumber. Offline nodes are diagnostics only, not usable candidates. If local Chrome talk-back audio is unsupported on this OS, use mode=transcribe, transport=twilio, or a macOS chrome-node for agent/bidi Chrome. If a Meet tab is already open after a timeout, call recover_current_tab before retrying join to report login, permission, or admission blockers without opening another tab.", + parameters: GoogleMeetToolSchema, + async execute(_toolCallId, params) { + const raw = asParamRecord(params); + const requesterSessionKey = normalizeOptionalString(toolContext.sessionKey); + const rawWithRequester = requesterSessionKey ? { ...raw, requesterSessionKey } : raw; + try { + assertGoogleMeetAgentToolActionSupported({ config, raw }); + switch (raw.action) { + case "join": { + return json( + await callGoogleMeetGatewayFromTool({ + config, + action: "join", + raw: rawWithRequester, + }), + ); } - return json(await callGoogleMeetGatewayFromTool({ config, action: "leave", raw })); - } - case "end_active_conference": { - return json( - await callGoogleMeetGatewayFromTool({ - config, - action: "end_active_conference", - raw, - }), - ); - } - case "speak": { - const sessionId = normalizeOptionalString(raw.sessionId); - if (!sessionId) { - throw new Error("sessionId required"); + case "create": { + return json( + await callGoogleMeetGatewayFromTool({ + config, + action: "create", + raw: rawWithRequester, + }), + ); } - return json(await callGoogleMeetGatewayFromTool({ config, action: "speak", raw })); + case "test_speech": { + return json( + await callGoogleMeetGatewayFromTool({ + config, + action: "test_speech", + raw: rawWithRequester, + }), + ); + } + case "test_listen": { + return json( + await callGoogleMeetGatewayFromTool({ config, action: "test_listen", raw }), + ); + } + case "status": { + return json(await callGoogleMeetGatewayFromTool({ config, action: "status", raw })); + } + case "recover_current_tab": { + return json( + await callGoogleMeetGatewayFromTool({ + config, + action: "recover_current_tab", + raw, + }), + ); + } + case "setup_status": { + return json( + await callGoogleMeetGatewayFromTool({ config, action: "setup_status", raw }), + ); + } + case "resolve_space": { + const { token: _token, ...result } = await resolveSpaceFromParams(config, raw); + return json(result); + } + case "preflight": { + const { meeting, token, space } = await resolveSpaceFromParams(config, raw); + return json( + buildGoogleMeetPreflightReport({ + input: meeting, + space, + previewAcknowledged: config.preview.enrollmentAcknowledged, + tokenSource: token.refreshed ? "refresh-token" : "cached-access-token", + }), + ); + } + case "latest": { + const token = await resolveGoogleMeetTokenFromParams(config, raw); + const resolved = await resolveMeetingFromParams({ + config, + raw, + accessToken: token.accessToken, + }); + return json({ + ...(await fetchLatestGoogleMeetConferenceRecord({ + accessToken: token.accessToken, + meeting: resolved.meeting, + })), + ...(resolved.calendarEvent ? { calendarEvent: resolved.calendarEvent } : {}), + }); + } + case "calendar_events": { + const token = await resolveGoogleMeetTokenFromParams(config, raw); + const window = raw.today === true ? buildGoogleMeetCalendarDayWindow() : {}; + return json( + await listGoogleMeetCalendarEvents({ + accessToken: token.accessToken, + calendarId: normalizeOptionalString(raw.calendarId), + eventQuery: normalizeOptionalString(raw.event), + ...window, + }), + ); + } + case "artifacts": { + const resolved = await resolveArtifactQueryFromParams(config, raw); + return json( + await fetchGoogleMeetArtifacts({ + accessToken: resolved.token.accessToken, + meeting: resolved.meeting, + conferenceRecord: resolved.conferenceRecord, + pageSize: resolved.pageSize, + includeTranscriptEntries: resolved.includeTranscriptEntries, + includeDocumentBodies: resolved.includeDocumentBodies, + allConferenceRecords: resolved.allConferenceRecords, + }), + ); + } + case "attendance": { + const resolved = await resolveArtifactQueryFromParams(config, raw); + return json( + await fetchGoogleMeetAttendance({ + accessToken: resolved.token.accessToken, + meeting: resolved.meeting, + conferenceRecord: resolved.conferenceRecord, + pageSize: resolved.pageSize, + allConferenceRecords: resolved.allConferenceRecords, + mergeDuplicateParticipants: resolved.mergeDuplicateParticipants, + lateAfterMinutes: resolved.lateAfterMinutes, + earlyBeforeMinutes: resolved.earlyBeforeMinutes, + }), + ); + } + case "export": { + return json(await exportGoogleMeetBundleFromParams(config, raw)); + } + case "leave": { + const sessionId = normalizeOptionalString(raw.sessionId); + if (!sessionId) { + throw new Error("sessionId required"); + } + return json(await callGoogleMeetGatewayFromTool({ config, action: "leave", raw })); + } + case "end_active_conference": { + return json( + await callGoogleMeetGatewayFromTool({ + config, + action: "end_active_conference", + raw, + }), + ); + } + case "speak": { + const sessionId = normalizeOptionalString(raw.sessionId); + if (!sessionId) { + throw new Error("sessionId required"); + } + return json(await callGoogleMeetGatewayFromTool({ config, action: "speak", raw })); + } + default: + throw new Error("unknown google_meet action"); } - default: - throw new Error("unknown google_meet action"); + } catch (err) { + return json(formatGatewayError(err)); } - } catch (err) { - return json(formatGatewayError(err)); - } - }, - }); + }, + }), + { name: "google_meet" }, + ); api.registerNodeHostCommand({ command: "googlemeet.chrome", diff --git a/extensions/google-meet/src/agent-consult.ts b/extensions/google-meet/src/agent-consult.ts index 3727dc84dc4..31f8c4af7dd 100644 --- a/extensions/google-meet/src/agent-consult.ts +++ b/extensions/google-meet/src/agent-consult.ts @@ -10,6 +10,7 @@ import { type RealtimeVoiceTool, } from "openclaw/plugin-sdk/realtime-voice"; import { normalizeAgentId } from "openclaw/plugin-sdk/routing"; +import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime"; import type { GoogleMeetConfig, GoogleMeetToolPolicy } from "./config.js"; export const GOOGLE_MEET_AGENT_CONSULT_TOOL_NAME = REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME; @@ -44,11 +45,13 @@ export async function consultOpenClawAgentForGoogleMeet(params: { runtime: PluginRuntime; logger: RuntimeLogger; meetingSessionId: string; + requesterSessionKey?: string; args: unknown; transcript: Array<{ role: "user" | "assistant"; text: string }>; }): Promise<{ text: string }> { const agentId = normalizeAgentId(params.config.realtime.agentId); - const requesterSessionKey = `agent:${agentId}:main`; + const requesterSessionKey = + normalizeOptionalString(params.requesterSessionKey) ?? `agent:${agentId}:main`; const sessionKey = `agent:${agentId}:subagent:google-meet:${params.meetingSessionId}`; return await consultRealtimeVoiceAgent({ cfg: params.fullConfig, @@ -60,6 +63,7 @@ export async function consultOpenClawAgentForGoogleMeet(params: { lane: "google-meet", runIdPrefix: `google-meet:${params.meetingSessionId}`, spawnedBy: requesterSessionKey, + contextMode: "fork", args: params.args, transcript: params.transcript, surface: "a private Google Meet", diff --git a/extensions/google-meet/src/create.ts b/extensions/google-meet/src/create.ts index a557ef275c1..10b720845fe 100644 --- a/extensions/google-meet/src/create.ts +++ b/extensions/google-meet/src/create.ts @@ -146,6 +146,7 @@ export async function createAndJoinMeetFromParams(params: { pin: normalizeOptionalString(params.raw.pin), dtmfSequence: normalizeOptionalString(params.raw.dtmfSequence), message: normalizeOptionalString(params.raw.message), + requesterSessionKey: normalizeOptionalString(params.raw.requesterSessionKey), }); return { ...created, diff --git a/extensions/google-meet/src/realtime-node.ts b/extensions/google-meet/src/realtime-node.ts index fac11676efa..64f9e28c199 100644 --- a/extensions/google-meet/src/realtime-node.ts +++ b/extensions/google-meet/src/realtime-node.ts @@ -76,6 +76,7 @@ export async function startNodeAgentAudioBridge(params: { fullConfig: OpenClawConfig; runtime: PluginRuntime; meetingSessionId: string; + requesterSessionKey?: string; nodeId: string; bridgeId: string; logger: RuntimeLogger; @@ -225,6 +226,7 @@ export async function startNodeAgentAudioBridge(params: { runtime: params.runtime, logger: params.logger, meetingSessionId: params.meetingSessionId, + requesterSessionKey: params.requesterSessionKey, args: { question: currentQuestion, responseStyle: "Brief, natural spoken answer for a live meeting.", @@ -373,6 +375,7 @@ export async function startNodeRealtimeAudioBridge(params: { fullConfig: OpenClawConfig; runtime: PluginRuntime; meetingSessionId: string; + requesterSessionKey?: string; nodeId: string; bridgeId: string; logger: RuntimeLogger; @@ -457,6 +460,7 @@ export async function startNodeRealtimeAudioBridge(params: { runtime: params.runtime, logger: params.logger, meetingSessionId: params.meetingSessionId, + requesterSessionKey: params.requesterSessionKey, args: { question: currentQuestion, responseStyle: "Brief, natural spoken answer for a live meeting.", @@ -634,6 +638,7 @@ export async function startNodeRealtimeAudioBridge(params: { runtime: params.runtime, logger: params.logger, meetingSessionId: params.meetingSessionId, + requesterSessionKey: params.requesterSessionKey, args: event.args, transcript, }) diff --git a/extensions/google-meet/src/realtime.ts b/extensions/google-meet/src/realtime.ts index e112308ce06..929a2eeacd4 100644 --- a/extensions/google-meet/src/realtime.ts +++ b/extensions/google-meet/src/realtime.ts @@ -513,6 +513,7 @@ export async function startCommandAgentAudioBridge(params: { fullConfig: OpenClawConfig; runtime: PluginRuntime; meetingSessionId: string; + requesterSessionKey?: string; inputCommand: string[]; outputCommand: string[]; logger: RuntimeLogger; @@ -711,6 +712,7 @@ export async function startCommandAgentAudioBridge(params: { runtime: params.runtime, logger: params.logger, meetingSessionId: params.meetingSessionId, + requesterSessionKey: params.requesterSessionKey, args: { question: currentQuestion, responseStyle: "Brief, natural spoken answer for a live meeting.", @@ -822,6 +824,7 @@ export async function startCommandRealtimeAudioBridge(params: { fullConfig: OpenClawConfig; runtime: PluginRuntime; meetingSessionId: string; + requesterSessionKey?: string; inputCommand: string[]; outputCommand: string[]; logger: RuntimeLogger; @@ -1108,6 +1111,7 @@ export async function startCommandRealtimeAudioBridge(params: { runtime: params.runtime, logger: params.logger, meetingSessionId: params.meetingSessionId, + requesterSessionKey: params.requesterSessionKey, args: { question: currentQuestion, responseStyle: "Brief, natural spoken answer for a live meeting.", @@ -1208,6 +1212,7 @@ export async function startCommandRealtimeAudioBridge(params: { runtime: params.runtime, logger: params.logger, meetingSessionId: params.meetingSessionId, + requesterSessionKey: params.requesterSessionKey, args: event.args, transcript, }) diff --git a/extensions/google-meet/src/runtime.ts b/extensions/google-meet/src/runtime.ts index f985f3e7198..1bfb90fa1c1 100644 --- a/extensions/google-meet/src/runtime.ts +++ b/extensions/google-meet/src/runtime.ts @@ -426,6 +426,7 @@ export class GoogleMeetRuntime { config: this.params.config, fullConfig: this.params.fullConfig, meetingSessionId: session.id, + requesterSessionKey: request.requesterSessionKey, mode, url, logger: this.params.logger, @@ -435,6 +436,7 @@ export class GoogleMeetRuntime { config: this.params.config, fullConfig: this.params.fullConfig, meetingSessionId: session.id, + requesterSessionKey: request.requesterSessionKey, mode, url, logger: this.params.logger, diff --git a/extensions/google-meet/src/test-support/plugin-harness.ts b/extensions/google-meet/src/test-support/plugin-harness.ts index 2e1220301a4..6ab045b0cce 100644 --- a/extensions/google-meet/src/test-support/plugin-harness.ts +++ b/extensions/google-meet/src/test-support/plugin-harness.ts @@ -61,6 +61,7 @@ export function setupGoogleMeetPlugin( options?: { timeoutMs?: number }, ) => Promise; registerPlatform?: NodeJS.Platform; + toolContext?: Record; } = {}, ) { const methods = new Map(); @@ -154,7 +155,13 @@ export function setupGoogleMeetPlugin( } as unknown as OpenClawPluginApi["runtime"], logger: noopLogger, registerGatewayMethod: (method: string, handler: unknown) => methods.set(method, handler), - registerTool: (tool: unknown) => tools.push(tool), + registerTool: (tool: unknown) => { + tools.push( + typeof tool === "function" + ? (tool as (ctx: Record) => unknown)(options.toolContext ?? {}) + : tool, + ); + }, registerCli: (_registrar: unknown, opts: unknown) => cliRegistrations.push(opts), registerNodeHostCommand: (command: unknown) => nodeHostCommands.push(command), }); diff --git a/extensions/google-meet/src/transports/chrome.ts b/extensions/google-meet/src/transports/chrome.ts index d002265aeb1..5d47fd29dbf 100644 --- a/extensions/google-meet/src/transports/chrome.ts +++ b/extensions/google-meet/src/transports/chrome.ts @@ -92,6 +92,7 @@ export async function launchChromeMeet(params: { config: GoogleMeetConfig; fullConfig: OpenClawConfig; meetingSessionId: string; + requesterSessionKey?: string; mode: GoogleMeetMode; url: string; logger: RuntimeLogger; @@ -162,6 +163,7 @@ export async function launchChromeMeet(params: { fullConfig: params.fullConfig, runtime: params.runtime, meetingSessionId: params.meetingSessionId, + requesterSessionKey: params.requesterSessionKey, inputCommand: params.config.chrome.audioInputCommand, outputCommand: params.config.chrome.audioOutputCommand, logger: params.logger, @@ -174,6 +176,7 @@ export async function launchChromeMeet(params: { fullConfig: params.fullConfig, runtime: params.runtime, meetingSessionId: params.meetingSessionId, + requesterSessionKey: params.requesterSessionKey, inputCommand: params.config.chrome.audioInputCommand, outputCommand: params.config.chrome.audioOutputCommand, logger: params.logger, @@ -950,6 +953,7 @@ export async function launchChromeMeetOnNode(params: { config: GoogleMeetConfig; fullConfig: OpenClawConfig; meetingSessionId: string; + requesterSessionKey?: string; mode: GoogleMeetMode; url: string; logger: RuntimeLogger; @@ -1025,6 +1029,7 @@ export async function launchChromeMeetOnNode(params: { fullConfig: params.fullConfig, runtime: params.runtime, meetingSessionId: params.meetingSessionId, + requesterSessionKey: params.requesterSessionKey, nodeId, bridgeId: result.bridgeId, logger: params.logger, diff --git a/extensions/google-meet/src/transports/types.ts b/extensions/google-meet/src/transports/types.ts index b3249221454..cac0298829c 100644 --- a/extensions/google-meet/src/transports/types.ts +++ b/extensions/google-meet/src/transports/types.ts @@ -7,6 +7,7 @@ export type GoogleMeetJoinRequest = { transport?: GoogleMeetTransport; mode?: GoogleMeetModeInput; message?: string; + requesterSessionKey?: string; timeoutMs?: number; dialInNumber?: string; pin?: string; diff --git a/src/realtime-voice/agent-consult-runtime.test.ts b/src/realtime-voice/agent-consult-runtime.test.ts index 568970e38ab..fbd6db234df 100644 --- a/src/realtime-voice/agent-consult-runtime.test.ts +++ b/src/realtime-voice/agent-consult-runtime.test.ts @@ -1,5 +1,6 @@ -import { describe, expect, it, vi } from "vitest"; +import { afterEach, describe, expect, it, vi } from "vitest"; import { + __setRealtimeVoiceAgentConsultDepsForTest, consultRealtimeVoiceAgent, resolveRealtimeVoiceAgentConsultTools, resolveRealtimeVoiceAgentConsultToolsAllow, @@ -7,7 +8,17 @@ import { import { REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME } from "./agent-consult-tool.js"; function createAgentRuntime(payloads: unknown[] = [{ text: "Speak this." }]) { - const sessionStore: Record = {}; + const sessionStore: Record< + string, + { + sessionId?: string; + updatedAt?: number; + sessionFile?: string; + spawnedBy?: string; + forkedFromParent?: boolean; + totalTokens?: number; + } + > = {}; const runEmbeddedPiAgent = vi.fn(async () => ({ payloads, meta: {}, @@ -31,7 +42,10 @@ function createAgentRuntime(payloads: unknown[] = [{ text: "Speak this." }]) { loadSessionStore: vi.fn(() => sessionStore), saveSessionStore: vi.fn(async () => {}), updateSessionStore, - resolveSessionFilePath: vi.fn(() => "/tmp/session.json"), + resolveSessionFilePath: vi.fn( + (_sessionId: string, entry?: { sessionFile?: string }) => + entry?.sessionFile ?? "/tmp/session.json", + ), }, runEmbeddedPiAgent, }, @@ -41,6 +55,10 @@ function createAgentRuntime(payloads: unknown[] = [{ text: "Speak this." }]) { } describe("realtime voice agent consult runtime", () => { + afterEach(() => { + __setRealtimeVoiceAgentConsultDepsForTest(null); + }); + it("exposes the shared consult tool based on policy", () => { expect(resolveRealtimeVoiceAgentConsultTools("safe-read-only")).toEqual([ expect.objectContaining({ name: REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME }), @@ -151,4 +169,67 @@ describe("realtime voice agent consult runtime", () => { "[realtime-voice] agent consult produced no answer: agent returned no speakable text", ); }); + + it("forks requester context when fork mode has a parent session", async () => { + const { runtime, runEmbeddedPiAgent, sessionStore } = createAgentRuntime(); + sessionStore["agent:main:main"] = { + sessionId: "parent-session", + sessionFile: "/tmp/parent.jsonl", + totalTokens: 100, + updatedAt: 1, + }; + const resolveParentForkDecision = vi.fn(async () => ({ + status: "fork" as const, + maxTokens: 100_000, + parentTokens: 100, + })); + const forkSessionFromParent = vi.fn(async () => ({ + sessionId: "forked-session", + sessionFile: "/tmp/forked.jsonl", + })); + __setRealtimeVoiceAgentConsultDepsForTest({ + resolveParentForkDecision, + forkSessionFromParent, + }); + + await consultRealtimeVoiceAgent({ + cfg: {} as never, + agentRuntime: runtime as never, + logger: { warn: vi.fn() }, + agentId: "main", + sessionKey: "agent:main:subagent:google-meet:meet-1", + spawnedBy: "agent:main:main", + contextMode: "fork", + messageProvider: "google-meet", + lane: "google-meet", + runIdPrefix: "google-meet:meet-1", + args: { question: "What should I say?" }, + transcript: [], + surface: "a private Google Meet", + userLabel: "Participant", + }); + + expect(resolveParentForkDecision).toHaveBeenCalledWith({ + parentEntry: sessionStore["agent:main:main"], + storePath: "/tmp/sessions.json", + }); + expect(forkSessionFromParent).toHaveBeenCalledWith({ + parentEntry: sessionStore["agent:main:main"], + agentId: "main", + sessionsDir: "/tmp", + }); + expect(sessionStore["agent:main:subagent:google-meet:meet-1"]).toMatchObject({ + sessionId: "forked-session", + sessionFile: "/tmp/forked.jsonl", + spawnedBy: "agent:main:main", + forkedFromParent: true, + }); + expect(runEmbeddedPiAgent).toHaveBeenCalledWith( + expect.objectContaining({ + sessionId: "forked-session", + sessionFile: "/tmp/forked.jsonl", + spawnedBy: "agent:main:main", + }), + ); + }); }); diff --git a/src/realtime-voice/agent-consult-runtime.ts b/src/realtime-voice/agent-consult-runtime.ts index a7ff9727463..7d8a415552c 100644 --- a/src/realtime-voice/agent-consult-runtime.ts +++ b/src/realtime-voice/agent-consult-runtime.ts @@ -1,8 +1,14 @@ import { randomUUID } from "node:crypto"; +import path from "node:path"; import type { RunEmbeddedPiAgentParams } from "../agents/pi-embedded-runner/run/params.js"; +import { + forkSessionFromParent, + resolveParentForkDecision, +} from "../auto-reply/reply/session-fork.js"; import type { SessionEntry } from "../config/sessions/types.js"; import type { OpenClawConfig } from "../config/types.openclaw.js"; import type { RuntimeLogger, PluginRuntimeCore } from "../plugins/runtime/types-core.js"; +import { parseAgentSessionKey } from "../routing/session-key.js"; import { buildRealtimeVoiceAgentConsultPrompt, collectRealtimeVoiceAgentConsultVisibleText, @@ -11,11 +17,34 @@ import { export type RealtimeVoiceAgentConsultRuntime = PluginRuntimeCore["agent"]; export type RealtimeVoiceAgentConsultResult = { text: string }; +export type RealtimeVoiceAgentConsultContextMode = "isolated" | "fork"; export { resolveRealtimeVoiceAgentConsultTools, resolveRealtimeVoiceAgentConsultToolsAllow, } from "./agent-consult-tool.js"; +type RealtimeVoiceAgentConsultDeps = { + randomUUID: typeof randomUUID; + resolveParentForkDecision: typeof resolveParentForkDecision; + forkSessionFromParent: typeof forkSessionFromParent; +}; + +const defaultRealtimeVoiceAgentConsultDeps: RealtimeVoiceAgentConsultDeps = { + randomUUID, + resolveParentForkDecision, + forkSessionFromParent, +}; + +let realtimeVoiceAgentConsultDeps = defaultRealtimeVoiceAgentConsultDeps; + +export function __setRealtimeVoiceAgentConsultDepsForTest( + deps: Partial | null, +): void { + realtimeVoiceAgentConsultDeps = deps + ? { ...defaultRealtimeVoiceAgentConsultDeps, ...deps } + : defaultRealtimeVoiceAgentConsultDeps; +} + function resolveRealtimeVoiceAgentSandboxSessionKey(agentId: string, sessionKey: string): string { const trimmed = sessionKey.trim(); if (trimmed.toLowerCase().startsWith("agent:")) { @@ -24,6 +53,73 @@ function resolveRealtimeVoiceAgentSandboxSessionKey(agentId: string, sessionKey: return `agent:${agentId}:${trimmed}`; } +async function resolveRealtimeVoiceAgentConsultSessionEntry(params: { + agentId: string; + sessionKey: string; + spawnedBy?: string | null; + contextMode?: RealtimeVoiceAgentConsultContextMode; + storePath: string; + agentRuntime: RealtimeVoiceAgentConsultRuntime; + logger: Pick; +}): Promise { + const now = Date.now(); + return await params.agentRuntime.session.updateSessionStore(params.storePath, async (store) => { + const existing = store[params.sessionKey] as SessionEntry | undefined; + if (existing?.sessionId?.trim()) { + const next: SessionEntry = { ...existing, updatedAt: now }; + store[params.sessionKey] = next; + return next; + } + + const requesterSessionKey = params.spawnedBy?.trim(); + const requesterAgentId = parseAgentSessionKey(requesterSessionKey)?.agentId; + const shouldFork = + params.contextMode === "fork" && + requesterSessionKey && + (!requesterAgentId || requesterAgentId === params.agentId); + + if (shouldFork) { + const parentEntry = store[requesterSessionKey] as SessionEntry | undefined; + if (parentEntry?.sessionId?.trim()) { + const decision = await realtimeVoiceAgentConsultDeps.resolveParentForkDecision({ + parentEntry, + storePath: params.storePath, + }); + if (decision.status === "fork") { + const fork = await realtimeVoiceAgentConsultDeps.forkSessionFromParent({ + parentEntry, + agentId: params.agentId, + sessionsDir: path.dirname(params.storePath), + }); + if (fork) { + const next: SessionEntry = { + ...existing, + sessionId: fork.sessionId, + sessionFile: fork.sessionFile, + spawnedBy: requesterSessionKey, + forkedFromParent: true, + updatedAt: now, + }; + store[params.sessionKey] = next; + return next; + } + } else { + params.logger.warn(`[realtime-voice] ${decision.message}`); + } + } + } + + const next: SessionEntry = { + ...existing, + sessionId: realtimeVoiceAgentConsultDeps.randomUUID(), + ...(requesterSessionKey ? { spawnedBy: requesterSessionKey } : {}), + updatedAt: now, + }; + store[params.sessionKey] = next; + return next; + }); +} + export async function consultRealtimeVoiceAgent(params: { cfg: OpenClawConfig; agentRuntime: RealtimeVoiceAgentConsultRuntime; @@ -40,6 +136,7 @@ export async function consultRealtimeVoiceAgent(params: { questionSourceLabel?: string; agentId?: string; spawnedBy?: string | null; + contextMode?: RealtimeVoiceAgentConsultContextMode; provider?: RunEmbeddedPiAgentParams["provider"]; model?: RunEmbeddedPiAgentParams["model"]; thinkLevel?: RunEmbeddedPiAgentParams["thinkLevel"]; @@ -56,13 +153,14 @@ export async function consultRealtimeVoiceAgent(params: { const storePath = params.agentRuntime.session.resolveStorePath(params.cfg.session?.store, { agentId, }); - const now = Date.now(); - const sessionEntry = await params.agentRuntime.session.updateSessionStore(storePath, (store) => { - const existing = store[params.sessionKey] as SessionEntry | undefined; - const sessionId = existing?.sessionId?.trim() || randomUUID(); - const next: SessionEntry = { ...existing, sessionId, updatedAt: now }; - store[params.sessionKey] = next; - return next; + const sessionEntry = await resolveRealtimeVoiceAgentConsultSessionEntry({ + agentId, + sessionKey: params.sessionKey, + spawnedBy: params.spawnedBy, + contextMode: params.contextMode, + storePath, + agentRuntime: params.agentRuntime, + logger: params.logger, }); const sessionId = sessionEntry.sessionId;