fix: fork google meet agent context

This commit is contained in:
Peter Steinberger
2026-05-04 07:35:34 +01:00
parent f29aaa2e04
commit deffd11a43
14 changed files with 421 additions and 157 deletions

View File

@@ -58,6 +58,7 @@ Docs: https://docs.openclaw.ai
- Release/beta smoke: resolve the dispatched Telegram beta E2E run from `gh run list` when `gh workflow run` returns no run URL, so the maintainer helper does not fail immediately after dispatch. Thanks @vincentkoc.
- Media/images: keep HEIC/HEIF attachments fail-closed when optional Sharp conversion is unavailable instead of sending originals that still need conversion. Thanks @vincentkoc.
- Google Meet: fork the caller's current agent transcript into agent-mode meeting consultant sessions, so Meet replies inherit the context from the tool call that joined the meeting.
- Telegram/streaming: sanitize tool-progress draft preview backticks before shared compaction, so long backtick-heavy progress text still renders inside the safe code-formatted preview instead of collapsing to an ellipsis.
- UI/chat: remove the unsupported `line-clamp` declaration from the chat queue text rule to eliminate Firefox console noise without changing visible truncation behavior. Thanks @ZanderH-code.
- Agents/Pi: suppress persistence for synthetic mid-turn overflow continuation prompts, so transcript-retry recovery does not write the "continue from transcript" prompt as a new user turn. Thanks @vincentkoc.

View File

@@ -1707,6 +1707,11 @@ Chrome talk-back modes need `BlackHole 2ch` plus either:
audio path and must exit after starting or validating its daemon. This is only
valid for `bidi` because `agent` mode needs direct command-pair access for TTS.
When an agent calls the `google_meet` tool in agent mode, the meeting consultant
session forks the caller's current transcript before answering participant
speech. The Meet session still stays separate (`agent:<agentId>:subagent:google-meet:<sessionId>`)
so meeting follow-ups do not mutate the caller transcript directly.
For clean duplex audio, route Meet output and Meet microphone through separate
virtual devices or a Loopback-style virtual device graph. A single shared
BlackHole device can echo other participants back into the call.

View File

@@ -1259,6 +1259,32 @@ describe("google-meet plugin", () => {
});
});
it("passes the caller session key through tool joins for agent context forking", async () => {
const { tools } = setup(
{},
{ toolContext: { sessionKey: "agent:main:discord:channel:general" } },
);
const gatewayParams: unknown[] = [];
googleMeetPluginTesting.setCallGatewayFromCliForTests(async (_method, _opts, params) => {
gatewayParams.push(params);
return { ok: true };
});
const tool = tools[0] as {
execute: (id: string, params: unknown) => Promise<unknown>;
};
await tool.execute("id", {
action: "join",
url: "https://meet.google.com/abc-defg-hij",
requesterSessionKey: "agent:main:wrong",
});
expect(gatewayParams[0]).toMatchObject({
url: "https://meet.google.com/abc-defg-hij",
requesterSessionKey: "agent:main:discord:channel:general",
});
});
it("explains that Twilio joins need dial-in details", async () => {
const { tools } = setup({ defaultTransport: "twilio" });
const tool = tools[0] as {

View File

@@ -741,6 +741,7 @@ export default definePluginEntry({
pin: normalizeOptionalString(params?.pin),
dtmfSequence: normalizeOptionalString(params?.dtmfSequence),
message: normalizeOptionalString(params?.message),
requesterSessionKey: normalizeOptionalString(params?.requesterSessionKey),
});
respond(true, result);
} catch (err) {
@@ -992,6 +993,7 @@ export default definePluginEntry({
pin: normalizeOptionalString(params?.pin),
dtmfSequence: normalizeOptionalString(params?.dtmfSequence),
message: normalizeOptionalString(params?.message),
requesterSessionKey: normalizeOptionalString(params?.requesterSessionKey),
});
respond(true, result);
} catch (err) {
@@ -1018,155 +1020,176 @@ export default definePluginEntry({
},
);
api.registerTool({
name: "google_meet",
label: "Google Meet",
description:
"Join and track Google Meet sessions through Chrome or Twilio. Call setup_status before join/create/test_listen/test_speech; if it reports a Chrome node offline, local audio missing, or missing Twilio dial plan, surface that blocker instead of retrying or switching transports. Twilio cannot dial a Meet URL directly: provide dialInNumber plus optional pin/dtmfSequence, or configure twilio.defaultDialInNumber. Offline nodes are diagnostics only, not usable candidates. If local Chrome talk-back audio is unsupported on this OS, use mode=transcribe, transport=twilio, or a macOS chrome-node for agent/bidi Chrome. If a Meet tab is already open after a timeout, call recover_current_tab before retrying join to report login, permission, or admission blockers without opening another tab.",
parameters: GoogleMeetToolSchema,
async execute(_toolCallId, params) {
const raw = asParamRecord(params);
try {
assertGoogleMeetAgentToolActionSupported({ config, raw });
switch (raw.action) {
case "join": {
return json(await callGoogleMeetGatewayFromTool({ config, action: "join", raw }));
}
case "create": {
return json(await callGoogleMeetGatewayFromTool({ config, action: "create", raw }));
}
case "test_speech": {
return json(
await callGoogleMeetGatewayFromTool({ config, action: "test_speech", raw }),
);
}
case "test_listen": {
return json(
await callGoogleMeetGatewayFromTool({ config, action: "test_listen", raw }),
);
}
case "status": {
return json(await callGoogleMeetGatewayFromTool({ config, action: "status", raw }));
}
case "recover_current_tab": {
return json(
await callGoogleMeetGatewayFromTool({
config,
action: "recover_current_tab",
raw,
}),
);
}
case "setup_status": {
return json(
await callGoogleMeetGatewayFromTool({ config, action: "setup_status", raw }),
);
}
case "resolve_space": {
const { token: _token, ...result } = await resolveSpaceFromParams(config, raw);
return json(result);
}
case "preflight": {
const { meeting, token, space } = await resolveSpaceFromParams(config, raw);
return json(
buildGoogleMeetPreflightReport({
input: meeting,
space,
previewAcknowledged: config.preview.enrollmentAcknowledged,
tokenSource: token.refreshed ? "refresh-token" : "cached-access-token",
}),
);
}
case "latest": {
const token = await resolveGoogleMeetTokenFromParams(config, raw);
const resolved = await resolveMeetingFromParams({
config,
raw,
accessToken: token.accessToken,
});
return json({
...(await fetchLatestGoogleMeetConferenceRecord({
accessToken: token.accessToken,
meeting: resolved.meeting,
})),
...(resolved.calendarEvent ? { calendarEvent: resolved.calendarEvent } : {}),
});
}
case "calendar_events": {
const token = await resolveGoogleMeetTokenFromParams(config, raw);
const window = raw.today === true ? buildGoogleMeetCalendarDayWindow() : {};
return json(
await listGoogleMeetCalendarEvents({
accessToken: token.accessToken,
calendarId: normalizeOptionalString(raw.calendarId),
eventQuery: normalizeOptionalString(raw.event),
...window,
}),
);
}
case "artifacts": {
const resolved = await resolveArtifactQueryFromParams(config, raw);
return json(
await fetchGoogleMeetArtifacts({
accessToken: resolved.token.accessToken,
meeting: resolved.meeting,
conferenceRecord: resolved.conferenceRecord,
pageSize: resolved.pageSize,
includeTranscriptEntries: resolved.includeTranscriptEntries,
includeDocumentBodies: resolved.includeDocumentBodies,
allConferenceRecords: resolved.allConferenceRecords,
}),
);
}
case "attendance": {
const resolved = await resolveArtifactQueryFromParams(config, raw);
return json(
await fetchGoogleMeetAttendance({
accessToken: resolved.token.accessToken,
meeting: resolved.meeting,
conferenceRecord: resolved.conferenceRecord,
pageSize: resolved.pageSize,
allConferenceRecords: resolved.allConferenceRecords,
mergeDuplicateParticipants: resolved.mergeDuplicateParticipants,
lateAfterMinutes: resolved.lateAfterMinutes,
earlyBeforeMinutes: resolved.earlyBeforeMinutes,
}),
);
}
case "export": {
return json(await exportGoogleMeetBundleFromParams(config, raw));
}
case "leave": {
const sessionId = normalizeOptionalString(raw.sessionId);
if (!sessionId) {
throw new Error("sessionId required");
api.registerTool(
(toolContext) => ({
name: "google_meet",
label: "Google Meet",
description:
"Join and track Google Meet sessions through Chrome or Twilio. Call setup_status before join/create/test_listen/test_speech; if it reports a Chrome node offline, local audio missing, or missing Twilio dial plan, surface that blocker instead of retrying or switching transports. Twilio cannot dial a Meet URL directly: provide dialInNumber plus optional pin/dtmfSequence, or configure twilio.defaultDialInNumber. Offline nodes are diagnostics only, not usable candidates. If local Chrome talk-back audio is unsupported on this OS, use mode=transcribe, transport=twilio, or a macOS chrome-node for agent/bidi Chrome. If a Meet tab is already open after a timeout, call recover_current_tab before retrying join to report login, permission, or admission blockers without opening another tab.",
parameters: GoogleMeetToolSchema,
async execute(_toolCallId, params) {
const raw = asParamRecord(params);
const requesterSessionKey = normalizeOptionalString(toolContext.sessionKey);
const rawWithRequester = requesterSessionKey ? { ...raw, requesterSessionKey } : raw;
try {
assertGoogleMeetAgentToolActionSupported({ config, raw });
switch (raw.action) {
case "join": {
return json(
await callGoogleMeetGatewayFromTool({
config,
action: "join",
raw: rawWithRequester,
}),
);
}
return json(await callGoogleMeetGatewayFromTool({ config, action: "leave", raw }));
}
case "end_active_conference": {
return json(
await callGoogleMeetGatewayFromTool({
config,
action: "end_active_conference",
raw,
}),
);
}
case "speak": {
const sessionId = normalizeOptionalString(raw.sessionId);
if (!sessionId) {
throw new Error("sessionId required");
case "create": {
return json(
await callGoogleMeetGatewayFromTool({
config,
action: "create",
raw: rawWithRequester,
}),
);
}
return json(await callGoogleMeetGatewayFromTool({ config, action: "speak", raw }));
case "test_speech": {
return json(
await callGoogleMeetGatewayFromTool({
config,
action: "test_speech",
raw: rawWithRequester,
}),
);
}
case "test_listen": {
return json(
await callGoogleMeetGatewayFromTool({ config, action: "test_listen", raw }),
);
}
case "status": {
return json(await callGoogleMeetGatewayFromTool({ config, action: "status", raw }));
}
case "recover_current_tab": {
return json(
await callGoogleMeetGatewayFromTool({
config,
action: "recover_current_tab",
raw,
}),
);
}
case "setup_status": {
return json(
await callGoogleMeetGatewayFromTool({ config, action: "setup_status", raw }),
);
}
case "resolve_space": {
const { token: _token, ...result } = await resolveSpaceFromParams(config, raw);
return json(result);
}
case "preflight": {
const { meeting, token, space } = await resolveSpaceFromParams(config, raw);
return json(
buildGoogleMeetPreflightReport({
input: meeting,
space,
previewAcknowledged: config.preview.enrollmentAcknowledged,
tokenSource: token.refreshed ? "refresh-token" : "cached-access-token",
}),
);
}
case "latest": {
const token = await resolveGoogleMeetTokenFromParams(config, raw);
const resolved = await resolveMeetingFromParams({
config,
raw,
accessToken: token.accessToken,
});
return json({
...(await fetchLatestGoogleMeetConferenceRecord({
accessToken: token.accessToken,
meeting: resolved.meeting,
})),
...(resolved.calendarEvent ? { calendarEvent: resolved.calendarEvent } : {}),
});
}
case "calendar_events": {
const token = await resolveGoogleMeetTokenFromParams(config, raw);
const window = raw.today === true ? buildGoogleMeetCalendarDayWindow() : {};
return json(
await listGoogleMeetCalendarEvents({
accessToken: token.accessToken,
calendarId: normalizeOptionalString(raw.calendarId),
eventQuery: normalizeOptionalString(raw.event),
...window,
}),
);
}
case "artifacts": {
const resolved = await resolveArtifactQueryFromParams(config, raw);
return json(
await fetchGoogleMeetArtifacts({
accessToken: resolved.token.accessToken,
meeting: resolved.meeting,
conferenceRecord: resolved.conferenceRecord,
pageSize: resolved.pageSize,
includeTranscriptEntries: resolved.includeTranscriptEntries,
includeDocumentBodies: resolved.includeDocumentBodies,
allConferenceRecords: resolved.allConferenceRecords,
}),
);
}
case "attendance": {
const resolved = await resolveArtifactQueryFromParams(config, raw);
return json(
await fetchGoogleMeetAttendance({
accessToken: resolved.token.accessToken,
meeting: resolved.meeting,
conferenceRecord: resolved.conferenceRecord,
pageSize: resolved.pageSize,
allConferenceRecords: resolved.allConferenceRecords,
mergeDuplicateParticipants: resolved.mergeDuplicateParticipants,
lateAfterMinutes: resolved.lateAfterMinutes,
earlyBeforeMinutes: resolved.earlyBeforeMinutes,
}),
);
}
case "export": {
return json(await exportGoogleMeetBundleFromParams(config, raw));
}
case "leave": {
const sessionId = normalizeOptionalString(raw.sessionId);
if (!sessionId) {
throw new Error("sessionId required");
}
return json(await callGoogleMeetGatewayFromTool({ config, action: "leave", raw }));
}
case "end_active_conference": {
return json(
await callGoogleMeetGatewayFromTool({
config,
action: "end_active_conference",
raw,
}),
);
}
case "speak": {
const sessionId = normalizeOptionalString(raw.sessionId);
if (!sessionId) {
throw new Error("sessionId required");
}
return json(await callGoogleMeetGatewayFromTool({ config, action: "speak", raw }));
}
default:
throw new Error("unknown google_meet action");
}
default:
throw new Error("unknown google_meet action");
} catch (err) {
return json(formatGatewayError(err));
}
} catch (err) {
return json(formatGatewayError(err));
}
},
});
},
}),
{ name: "google_meet" },
);
api.registerNodeHostCommand({
command: "googlemeet.chrome",

View File

@@ -10,6 +10,7 @@ import {
type RealtimeVoiceTool,
} from "openclaw/plugin-sdk/realtime-voice";
import { normalizeAgentId } from "openclaw/plugin-sdk/routing";
import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime";
import type { GoogleMeetConfig, GoogleMeetToolPolicy } from "./config.js";
export const GOOGLE_MEET_AGENT_CONSULT_TOOL_NAME = REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME;
@@ -44,11 +45,13 @@ export async function consultOpenClawAgentForGoogleMeet(params: {
runtime: PluginRuntime;
logger: RuntimeLogger;
meetingSessionId: string;
requesterSessionKey?: string;
args: unknown;
transcript: Array<{ role: "user" | "assistant"; text: string }>;
}): Promise<{ text: string }> {
const agentId = normalizeAgentId(params.config.realtime.agentId);
const requesterSessionKey = `agent:${agentId}:main`;
const requesterSessionKey =
normalizeOptionalString(params.requesterSessionKey) ?? `agent:${agentId}:main`;
const sessionKey = `agent:${agentId}:subagent:google-meet:${params.meetingSessionId}`;
return await consultRealtimeVoiceAgent({
cfg: params.fullConfig,
@@ -60,6 +63,7 @@ export async function consultOpenClawAgentForGoogleMeet(params: {
lane: "google-meet",
runIdPrefix: `google-meet:${params.meetingSessionId}`,
spawnedBy: requesterSessionKey,
contextMode: "fork",
args: params.args,
transcript: params.transcript,
surface: "a private Google Meet",

View File

@@ -146,6 +146,7 @@ export async function createAndJoinMeetFromParams(params: {
pin: normalizeOptionalString(params.raw.pin),
dtmfSequence: normalizeOptionalString(params.raw.dtmfSequence),
message: normalizeOptionalString(params.raw.message),
requesterSessionKey: normalizeOptionalString(params.raw.requesterSessionKey),
});
return {
...created,

View File

@@ -76,6 +76,7 @@ export async function startNodeAgentAudioBridge(params: {
fullConfig: OpenClawConfig;
runtime: PluginRuntime;
meetingSessionId: string;
requesterSessionKey?: string;
nodeId: string;
bridgeId: string;
logger: RuntimeLogger;
@@ -225,6 +226,7 @@ export async function startNodeAgentAudioBridge(params: {
runtime: params.runtime,
logger: params.logger,
meetingSessionId: params.meetingSessionId,
requesterSessionKey: params.requesterSessionKey,
args: {
question: currentQuestion,
responseStyle: "Brief, natural spoken answer for a live meeting.",
@@ -373,6 +375,7 @@ export async function startNodeRealtimeAudioBridge(params: {
fullConfig: OpenClawConfig;
runtime: PluginRuntime;
meetingSessionId: string;
requesterSessionKey?: string;
nodeId: string;
bridgeId: string;
logger: RuntimeLogger;
@@ -457,6 +460,7 @@ export async function startNodeRealtimeAudioBridge(params: {
runtime: params.runtime,
logger: params.logger,
meetingSessionId: params.meetingSessionId,
requesterSessionKey: params.requesterSessionKey,
args: {
question: currentQuestion,
responseStyle: "Brief, natural spoken answer for a live meeting.",
@@ -634,6 +638,7 @@ export async function startNodeRealtimeAudioBridge(params: {
runtime: params.runtime,
logger: params.logger,
meetingSessionId: params.meetingSessionId,
requesterSessionKey: params.requesterSessionKey,
args: event.args,
transcript,
})

View File

@@ -513,6 +513,7 @@ export async function startCommandAgentAudioBridge(params: {
fullConfig: OpenClawConfig;
runtime: PluginRuntime;
meetingSessionId: string;
requesterSessionKey?: string;
inputCommand: string[];
outputCommand: string[];
logger: RuntimeLogger;
@@ -711,6 +712,7 @@ export async function startCommandAgentAudioBridge(params: {
runtime: params.runtime,
logger: params.logger,
meetingSessionId: params.meetingSessionId,
requesterSessionKey: params.requesterSessionKey,
args: {
question: currentQuestion,
responseStyle: "Brief, natural spoken answer for a live meeting.",
@@ -822,6 +824,7 @@ export async function startCommandRealtimeAudioBridge(params: {
fullConfig: OpenClawConfig;
runtime: PluginRuntime;
meetingSessionId: string;
requesterSessionKey?: string;
inputCommand: string[];
outputCommand: string[];
logger: RuntimeLogger;
@@ -1108,6 +1111,7 @@ export async function startCommandRealtimeAudioBridge(params: {
runtime: params.runtime,
logger: params.logger,
meetingSessionId: params.meetingSessionId,
requesterSessionKey: params.requesterSessionKey,
args: {
question: currentQuestion,
responseStyle: "Brief, natural spoken answer for a live meeting.",
@@ -1208,6 +1212,7 @@ export async function startCommandRealtimeAudioBridge(params: {
runtime: params.runtime,
logger: params.logger,
meetingSessionId: params.meetingSessionId,
requesterSessionKey: params.requesterSessionKey,
args: event.args,
transcript,
})

View File

@@ -426,6 +426,7 @@ export class GoogleMeetRuntime {
config: this.params.config,
fullConfig: this.params.fullConfig,
meetingSessionId: session.id,
requesterSessionKey: request.requesterSessionKey,
mode,
url,
logger: this.params.logger,
@@ -435,6 +436,7 @@ export class GoogleMeetRuntime {
config: this.params.config,
fullConfig: this.params.fullConfig,
meetingSessionId: session.id,
requesterSessionKey: request.requesterSessionKey,
mode,
url,
logger: this.params.logger,

View File

@@ -61,6 +61,7 @@ export function setupGoogleMeetPlugin(
options?: { timeoutMs?: number },
) => Promise<CommandResult>;
registerPlatform?: NodeJS.Platform;
toolContext?: Record<string, unknown>;
} = {},
) {
const methods = new Map<string, unknown>();
@@ -154,7 +155,13 @@ export function setupGoogleMeetPlugin(
} as unknown as OpenClawPluginApi["runtime"],
logger: noopLogger,
registerGatewayMethod: (method: string, handler: unknown) => methods.set(method, handler),
registerTool: (tool: unknown) => tools.push(tool),
registerTool: (tool: unknown) => {
tools.push(
typeof tool === "function"
? (tool as (ctx: Record<string, unknown>) => unknown)(options.toolContext ?? {})
: tool,
);
},
registerCli: (_registrar: unknown, opts: unknown) => cliRegistrations.push(opts),
registerNodeHostCommand: (command: unknown) => nodeHostCommands.push(command),
});

View File

@@ -92,6 +92,7 @@ export async function launchChromeMeet(params: {
config: GoogleMeetConfig;
fullConfig: OpenClawConfig;
meetingSessionId: string;
requesterSessionKey?: string;
mode: GoogleMeetMode;
url: string;
logger: RuntimeLogger;
@@ -162,6 +163,7 @@ export async function launchChromeMeet(params: {
fullConfig: params.fullConfig,
runtime: params.runtime,
meetingSessionId: params.meetingSessionId,
requesterSessionKey: params.requesterSessionKey,
inputCommand: params.config.chrome.audioInputCommand,
outputCommand: params.config.chrome.audioOutputCommand,
logger: params.logger,
@@ -174,6 +176,7 @@ export async function launchChromeMeet(params: {
fullConfig: params.fullConfig,
runtime: params.runtime,
meetingSessionId: params.meetingSessionId,
requesterSessionKey: params.requesterSessionKey,
inputCommand: params.config.chrome.audioInputCommand,
outputCommand: params.config.chrome.audioOutputCommand,
logger: params.logger,
@@ -950,6 +953,7 @@ export async function launchChromeMeetOnNode(params: {
config: GoogleMeetConfig;
fullConfig: OpenClawConfig;
meetingSessionId: string;
requesterSessionKey?: string;
mode: GoogleMeetMode;
url: string;
logger: RuntimeLogger;
@@ -1025,6 +1029,7 @@ export async function launchChromeMeetOnNode(params: {
fullConfig: params.fullConfig,
runtime: params.runtime,
meetingSessionId: params.meetingSessionId,
requesterSessionKey: params.requesterSessionKey,
nodeId,
bridgeId: result.bridgeId,
logger: params.logger,

View File

@@ -7,6 +7,7 @@ export type GoogleMeetJoinRequest = {
transport?: GoogleMeetTransport;
mode?: GoogleMeetModeInput;
message?: string;
requesterSessionKey?: string;
timeoutMs?: number;
dialInNumber?: string;
pin?: string;

View File

@@ -1,5 +1,6 @@
import { describe, expect, it, vi } from "vitest";
import { afterEach, describe, expect, it, vi } from "vitest";
import {
__setRealtimeVoiceAgentConsultDepsForTest,
consultRealtimeVoiceAgent,
resolveRealtimeVoiceAgentConsultTools,
resolveRealtimeVoiceAgentConsultToolsAllow,
@@ -7,7 +8,17 @@ import {
import { REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME } from "./agent-consult-tool.js";
function createAgentRuntime(payloads: unknown[] = [{ text: "Speak this." }]) {
const sessionStore: Record<string, { sessionId?: string; updatedAt?: number }> = {};
const sessionStore: Record<
string,
{
sessionId?: string;
updatedAt?: number;
sessionFile?: string;
spawnedBy?: string;
forkedFromParent?: boolean;
totalTokens?: number;
}
> = {};
const runEmbeddedPiAgent = vi.fn(async () => ({
payloads,
meta: {},
@@ -31,7 +42,10 @@ function createAgentRuntime(payloads: unknown[] = [{ text: "Speak this." }]) {
loadSessionStore: vi.fn(() => sessionStore),
saveSessionStore: vi.fn(async () => {}),
updateSessionStore,
resolveSessionFilePath: vi.fn(() => "/tmp/session.json"),
resolveSessionFilePath: vi.fn(
(_sessionId: string, entry?: { sessionFile?: string }) =>
entry?.sessionFile ?? "/tmp/session.json",
),
},
runEmbeddedPiAgent,
},
@@ -41,6 +55,10 @@ function createAgentRuntime(payloads: unknown[] = [{ text: "Speak this." }]) {
}
describe("realtime voice agent consult runtime", () => {
afterEach(() => {
__setRealtimeVoiceAgentConsultDepsForTest(null);
});
it("exposes the shared consult tool based on policy", () => {
expect(resolveRealtimeVoiceAgentConsultTools("safe-read-only")).toEqual([
expect.objectContaining({ name: REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME }),
@@ -151,4 +169,67 @@ describe("realtime voice agent consult runtime", () => {
"[realtime-voice] agent consult produced no answer: agent returned no speakable text",
);
});
it("forks requester context when fork mode has a parent session", async () => {
const { runtime, runEmbeddedPiAgent, sessionStore } = createAgentRuntime();
sessionStore["agent:main:main"] = {
sessionId: "parent-session",
sessionFile: "/tmp/parent.jsonl",
totalTokens: 100,
updatedAt: 1,
};
const resolveParentForkDecision = vi.fn(async () => ({
status: "fork" as const,
maxTokens: 100_000,
parentTokens: 100,
}));
const forkSessionFromParent = vi.fn(async () => ({
sessionId: "forked-session",
sessionFile: "/tmp/forked.jsonl",
}));
__setRealtimeVoiceAgentConsultDepsForTest({
resolveParentForkDecision,
forkSessionFromParent,
});
await consultRealtimeVoiceAgent({
cfg: {} as never,
agentRuntime: runtime as never,
logger: { warn: vi.fn() },
agentId: "main",
sessionKey: "agent:main:subagent:google-meet:meet-1",
spawnedBy: "agent:main:main",
contextMode: "fork",
messageProvider: "google-meet",
lane: "google-meet",
runIdPrefix: "google-meet:meet-1",
args: { question: "What should I say?" },
transcript: [],
surface: "a private Google Meet",
userLabel: "Participant",
});
expect(resolveParentForkDecision).toHaveBeenCalledWith({
parentEntry: sessionStore["agent:main:main"],
storePath: "/tmp/sessions.json",
});
expect(forkSessionFromParent).toHaveBeenCalledWith({
parentEntry: sessionStore["agent:main:main"],
agentId: "main",
sessionsDir: "/tmp",
});
expect(sessionStore["agent:main:subagent:google-meet:meet-1"]).toMatchObject({
sessionId: "forked-session",
sessionFile: "/tmp/forked.jsonl",
spawnedBy: "agent:main:main",
forkedFromParent: true,
});
expect(runEmbeddedPiAgent).toHaveBeenCalledWith(
expect.objectContaining({
sessionId: "forked-session",
sessionFile: "/tmp/forked.jsonl",
spawnedBy: "agent:main:main",
}),
);
});
});

View File

@@ -1,8 +1,14 @@
import { randomUUID } from "node:crypto";
import path from "node:path";
import type { RunEmbeddedPiAgentParams } from "../agents/pi-embedded-runner/run/params.js";
import {
forkSessionFromParent,
resolveParentForkDecision,
} from "../auto-reply/reply/session-fork.js";
import type { SessionEntry } from "../config/sessions/types.js";
import type { OpenClawConfig } from "../config/types.openclaw.js";
import type { RuntimeLogger, PluginRuntimeCore } from "../plugins/runtime/types-core.js";
import { parseAgentSessionKey } from "../routing/session-key.js";
import {
buildRealtimeVoiceAgentConsultPrompt,
collectRealtimeVoiceAgentConsultVisibleText,
@@ -11,11 +17,34 @@ import {
export type RealtimeVoiceAgentConsultRuntime = PluginRuntimeCore["agent"];
export type RealtimeVoiceAgentConsultResult = { text: string };
export type RealtimeVoiceAgentConsultContextMode = "isolated" | "fork";
export {
resolveRealtimeVoiceAgentConsultTools,
resolveRealtimeVoiceAgentConsultToolsAllow,
} from "./agent-consult-tool.js";
type RealtimeVoiceAgentConsultDeps = {
randomUUID: typeof randomUUID;
resolveParentForkDecision: typeof resolveParentForkDecision;
forkSessionFromParent: typeof forkSessionFromParent;
};
const defaultRealtimeVoiceAgentConsultDeps: RealtimeVoiceAgentConsultDeps = {
randomUUID,
resolveParentForkDecision,
forkSessionFromParent,
};
let realtimeVoiceAgentConsultDeps = defaultRealtimeVoiceAgentConsultDeps;
export function __setRealtimeVoiceAgentConsultDepsForTest(
deps: Partial<RealtimeVoiceAgentConsultDeps> | null,
): void {
realtimeVoiceAgentConsultDeps = deps
? { ...defaultRealtimeVoiceAgentConsultDeps, ...deps }
: defaultRealtimeVoiceAgentConsultDeps;
}
function resolveRealtimeVoiceAgentSandboxSessionKey(agentId: string, sessionKey: string): string {
const trimmed = sessionKey.trim();
if (trimmed.toLowerCase().startsWith("agent:")) {
@@ -24,6 +53,73 @@ function resolveRealtimeVoiceAgentSandboxSessionKey(agentId: string, sessionKey:
return `agent:${agentId}:${trimmed}`;
}
async function resolveRealtimeVoiceAgentConsultSessionEntry(params: {
agentId: string;
sessionKey: string;
spawnedBy?: string | null;
contextMode?: RealtimeVoiceAgentConsultContextMode;
storePath: string;
agentRuntime: RealtimeVoiceAgentConsultRuntime;
logger: Pick<RuntimeLogger, "warn">;
}): Promise<SessionEntry> {
const now = Date.now();
return await params.agentRuntime.session.updateSessionStore(params.storePath, async (store) => {
const existing = store[params.sessionKey] as SessionEntry | undefined;
if (existing?.sessionId?.trim()) {
const next: SessionEntry = { ...existing, updatedAt: now };
store[params.sessionKey] = next;
return next;
}
const requesterSessionKey = params.spawnedBy?.trim();
const requesterAgentId = parseAgentSessionKey(requesterSessionKey)?.agentId;
const shouldFork =
params.contextMode === "fork" &&
requesterSessionKey &&
(!requesterAgentId || requesterAgentId === params.agentId);
if (shouldFork) {
const parentEntry = store[requesterSessionKey] as SessionEntry | undefined;
if (parentEntry?.sessionId?.trim()) {
const decision = await realtimeVoiceAgentConsultDeps.resolveParentForkDecision({
parentEntry,
storePath: params.storePath,
});
if (decision.status === "fork") {
const fork = await realtimeVoiceAgentConsultDeps.forkSessionFromParent({
parentEntry,
agentId: params.agentId,
sessionsDir: path.dirname(params.storePath),
});
if (fork) {
const next: SessionEntry = {
...existing,
sessionId: fork.sessionId,
sessionFile: fork.sessionFile,
spawnedBy: requesterSessionKey,
forkedFromParent: true,
updatedAt: now,
};
store[params.sessionKey] = next;
return next;
}
} else {
params.logger.warn(`[realtime-voice] ${decision.message}`);
}
}
}
const next: SessionEntry = {
...existing,
sessionId: realtimeVoiceAgentConsultDeps.randomUUID(),
...(requesterSessionKey ? { spawnedBy: requesterSessionKey } : {}),
updatedAt: now,
};
store[params.sessionKey] = next;
return next;
});
}
export async function consultRealtimeVoiceAgent(params: {
cfg: OpenClawConfig;
agentRuntime: RealtimeVoiceAgentConsultRuntime;
@@ -40,6 +136,7 @@ export async function consultRealtimeVoiceAgent(params: {
questionSourceLabel?: string;
agentId?: string;
spawnedBy?: string | null;
contextMode?: RealtimeVoiceAgentConsultContextMode;
provider?: RunEmbeddedPiAgentParams["provider"];
model?: RunEmbeddedPiAgentParams["model"];
thinkLevel?: RunEmbeddedPiAgentParams["thinkLevel"];
@@ -56,13 +153,14 @@ export async function consultRealtimeVoiceAgent(params: {
const storePath = params.agentRuntime.session.resolveStorePath(params.cfg.session?.store, {
agentId,
});
const now = Date.now();
const sessionEntry = await params.agentRuntime.session.updateSessionStore(storePath, (store) => {
const existing = store[params.sessionKey] as SessionEntry | undefined;
const sessionId = existing?.sessionId?.trim() || randomUUID();
const next: SessionEntry = { ...existing, sessionId, updatedAt: now };
store[params.sessionKey] = next;
return next;
const sessionEntry = await resolveRealtimeVoiceAgentConsultSessionEntry({
agentId,
sessionKey: params.sessionKey,
spawnedBy: params.spawnedBy,
contextMode: params.contextMode,
storePath,
agentRuntime: params.agentRuntime,
logger: params.logger,
});
const sessionId = sessionEntry.sessionId;