mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 09:10:45 +00:00
fix: continue Google Live consult responses (#72189) (thanks @VACInc)
Co-authored-by: VACInc <3279061+VACInc@users.noreply.github.com>
This commit is contained in:
@@ -85,6 +85,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Google Meet: clear queued Gemini Live playback when realtime interruptions arrive, restart Chrome command-pair audio output after clears, and expose Google Live interruption/VAD config knobs for Meet and Voice Call realtime bridges. Fixes #72523. (#72524) Thanks @BsnizND.
|
||||
- Google Meet: add `realtime.agentId` so live meeting consults can target a named OpenClaw agent instead of always using `main`. (#72381) Thanks @BsnizND.
|
||||
- Google Meet: route stateful `google_meet` tool actions through the gateway-owned runtime so created or joined realtime sessions remain visible to status, speak, and leave after the agent turn ends. Fixes #72440. (#72441) Thanks @BsnizND.
|
||||
- Google Meet/Voice Call: send Gemini Live a non-blocking consult continuation before long OpenClaw agent consults finish, then deliver the final result when idle so calls and meetings do not sit silent during tool-backed answers. (#72189) Thanks @VACInc.
|
||||
- Google Meet: preserve Gemini Live function names when replying to realtime tool calls so Google SDK validation accepts the `FunctionResponse` payload. Fixes #72425. (#72426) Thanks @BsnizND.
|
||||
- Matrix/E2EE: stabilize recovery and broken-device QA flows while avoiding Matrix device-cleanup sync races that could leave shutdown-time crypto work running. Thanks @gumadeiras.
|
||||
- Cron: apply `cron.maxConcurrentRuns` to a dedicated `cron-nested` isolated agent-turn lane as well as cron dispatch, so parallel cron jobs no longer serialize on inner LLM execution while non-cron nested flows keep their existing lane behavior. Fixes #72707. Thanks @kagura-agent.
|
||||
|
||||
@@ -585,6 +585,10 @@ API key auth, and dynamic model resolution.
|
||||
label: "Acme Realtime Voice",
|
||||
isConfigured: ({ providerConfig }) => Boolean(providerConfig.apiKey),
|
||||
createBridge: (req) => ({
|
||||
// Set this only if the provider accepts multiple tool responses for
|
||||
// one call, for example an immediate "working" response followed by
|
||||
// the final result.
|
||||
supportsToolResultContinuation: false,
|
||||
connect: async () => {},
|
||||
sendAudio: () => {},
|
||||
setMediaTimestamp: () => {},
|
||||
|
||||
@@ -1944,6 +1944,7 @@ describe("google-meet plugin", () => {
|
||||
| undefined;
|
||||
const sendAudio = vi.fn();
|
||||
const bridge = {
|
||||
supportsToolResultContinuation: true,
|
||||
connect: vi.fn(async () => {}),
|
||||
sendAudio,
|
||||
setMediaTimestamp: vi.fn(),
|
||||
@@ -2048,6 +2049,15 @@ describe("google-meet plugin", () => {
|
||||
name: "openclaw_agent_consult",
|
||||
args: { question: "What should I say about launch timing?" },
|
||||
});
|
||||
expect(bridge.submitToolResult).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
"tool-call-1",
|
||||
expect.objectContaining({
|
||||
status: "working",
|
||||
tool: "openclaw_agent_consult",
|
||||
}),
|
||||
{ willContinue: true },
|
||||
);
|
||||
|
||||
expect(spawnMock).toHaveBeenNthCalledWith(1, "play-meet", [], {
|
||||
stdio: ["pipe", "ignore", "pipe"],
|
||||
@@ -2082,9 +2092,13 @@ describe("google-meet plugin", () => {
|
||||
],
|
||||
});
|
||||
await vi.waitFor(() => {
|
||||
expect(bridge.submitToolResult).toHaveBeenCalledWith("tool-call-1", {
|
||||
text: "Use the Portugal launch data.",
|
||||
});
|
||||
expect(bridge.submitToolResult).toHaveBeenLastCalledWith(
|
||||
"tool-call-1",
|
||||
{
|
||||
text: "Use the Portugal launch data.",
|
||||
},
|
||||
undefined,
|
||||
);
|
||||
});
|
||||
expect(runtime.agent.runEmbeddedPiAgent).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
@@ -2121,6 +2135,7 @@ describe("google-meet plugin", () => {
|
||||
| undefined;
|
||||
const sendAudio = vi.fn();
|
||||
const bridge = {
|
||||
supportsToolResultContinuation: true,
|
||||
connect: vi.fn(async () => {}),
|
||||
sendAudio,
|
||||
setMediaTimestamp: vi.fn(),
|
||||
@@ -2196,6 +2211,15 @@ describe("google-meet plugin", () => {
|
||||
name: "openclaw_agent_consult",
|
||||
args: { question: "What should I say?" },
|
||||
});
|
||||
expect(bridge.submitToolResult).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
"tool-call-1",
|
||||
expect.objectContaining({
|
||||
status: "working",
|
||||
tool: "openclaw_agent_consult",
|
||||
}),
|
||||
{ willContinue: true },
|
||||
);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(sendAudio).toHaveBeenCalledWith(Buffer.from([9, 8, 7]));
|
||||
@@ -2227,9 +2251,13 @@ describe("google-meet plugin", () => {
|
||||
);
|
||||
});
|
||||
await vi.waitFor(() => {
|
||||
expect(bridge.submitToolResult).toHaveBeenCalledWith("tool-call-1", {
|
||||
text: "Use the launch update.",
|
||||
});
|
||||
expect(bridge.submitToolResult).toHaveBeenLastCalledWith(
|
||||
"tool-call-1",
|
||||
{
|
||||
text: "Use the launch update.",
|
||||
},
|
||||
undefined,
|
||||
);
|
||||
});
|
||||
expect(bridge.triggerGreeting).not.toHaveBeenCalled();
|
||||
handle.speak("Say exactly: hello from the node.");
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
|
||||
import type { PluginRuntime, RuntimeLogger } from "openclaw/plugin-sdk/plugin-runtime";
|
||||
import {
|
||||
buildRealtimeVoiceAgentConsultWorkingResponse,
|
||||
consultRealtimeVoiceAgent,
|
||||
REALTIME_VOICE_AGENT_CONSULT_TOOL,
|
||||
REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME,
|
||||
resolveRealtimeVoiceAgentConsultTools,
|
||||
resolveRealtimeVoiceAgentConsultToolsAllow,
|
||||
type RealtimeVoiceBridgeSession,
|
||||
type RealtimeVoiceTool,
|
||||
} from "openclaw/plugin-sdk/realtime-voice";
|
||||
import { normalizeAgentId } from "openclaw/plugin-sdk/routing";
|
||||
@@ -14,10 +16,30 @@ import type { GoogleMeetConfig, GoogleMeetToolPolicy } from "./config.js";
|
||||
export const GOOGLE_MEET_AGENT_CONSULT_TOOL_NAME = REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME;
|
||||
export const GOOGLE_MEET_AGENT_CONSULT_TOOL = REALTIME_VOICE_AGENT_CONSULT_TOOL;
|
||||
|
||||
const GOOGLE_MEET_CONSULT_SYSTEM_PROMPT = [
|
||||
"You are a behind-the-scenes consultant for a live meeting voice agent.",
|
||||
"Prioritize a fast, speakable answer over exhaustive investigation.",
|
||||
"For tool-backed status checks, prefer one or two bounded read-only queries before answering.",
|
||||
"Do not print secret values or dump environment variables; only check whether required configuration is present.",
|
||||
"Be accurate, brief, and speakable.",
|
||||
].join(" ");
|
||||
|
||||
export function resolveGoogleMeetRealtimeTools(policy: GoogleMeetToolPolicy): RealtimeVoiceTool[] {
|
||||
return resolveRealtimeVoiceAgentConsultTools(policy);
|
||||
}
|
||||
|
||||
export function submitGoogleMeetConsultWorkingResponse(
|
||||
session: RealtimeVoiceBridgeSession,
|
||||
callId: string,
|
||||
): void {
|
||||
if (!session.bridge.supportsToolResultContinuation) {
|
||||
return;
|
||||
}
|
||||
session.submitToolResult(callId, buildRealtimeVoiceAgentConsultWorkingResponse("participant"), {
|
||||
willContinue: true,
|
||||
});
|
||||
}
|
||||
|
||||
export async function consultOpenClawAgentForGoogleMeet(params: {
|
||||
config: GoogleMeetConfig;
|
||||
fullConfig: OpenClawConfig;
|
||||
@@ -45,7 +67,6 @@ export async function consultOpenClawAgentForGoogleMeet(params: {
|
||||
assistantLabel: "Agent",
|
||||
questionSourceLabel: "participant",
|
||||
toolsAllow: resolveRealtimeVoiceAgentConsultToolsAllow(params.config.realtime.toolPolicy),
|
||||
extraSystemPrompt:
|
||||
"You are a behind-the-scenes consultant for a live meeting voice agent. Be accurate, brief, and speakable.",
|
||||
extraSystemPrompt: GOOGLE_MEET_CONSULT_SYSTEM_PROMPT,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ import {
|
||||
consultOpenClawAgentForGoogleMeet,
|
||||
GOOGLE_MEET_AGENT_CONSULT_TOOL_NAME,
|
||||
resolveGoogleMeetRealtimeTools,
|
||||
submitGoogleMeetConsultWorkingResponse,
|
||||
} from "./agent-consult.js";
|
||||
import type { GoogleMeetConfig } from "./config.js";
|
||||
import { resolveGoogleMeetRealtimeProvider } from "./realtime.js";
|
||||
@@ -157,6 +158,7 @@ export async function startNodeRealtimeAudioBridge(params: {
|
||||
});
|
||||
return;
|
||||
}
|
||||
submitGoogleMeetConsultWorkingResponse(session, event.callId || event.itemId);
|
||||
void consultOpenClawAgentForGoogleMeet({
|
||||
config: params.config,
|
||||
fullConfig: params.fullConfig,
|
||||
|
||||
@@ -14,6 +14,7 @@ import {
|
||||
consultOpenClawAgentForGoogleMeet,
|
||||
GOOGLE_MEET_AGENT_CONSULT_TOOL_NAME,
|
||||
resolveGoogleMeetRealtimeTools,
|
||||
submitGoogleMeetConsultWorkingResponse,
|
||||
} from "./agent-consult.js";
|
||||
import type { GoogleMeetConfig } from "./config.js";
|
||||
import type { GoogleMeetChromeHealth } from "./transports/types.js";
|
||||
@@ -216,6 +217,7 @@ export async function startCommandRealtimeAudioBridge(params: {
|
||||
});
|
||||
return;
|
||||
}
|
||||
submitGoogleMeetConsultWorkingResponse(session, event.callId || event.itemId);
|
||||
void consultOpenClawAgentForGoogleMeet({
|
||||
config: params.config,
|
||||
fullConfig: params.fullConfig,
|
||||
|
||||
@@ -131,6 +131,18 @@ describe("buildGoogleRealtimeVoiceProvider", () => {
|
||||
required: ["query"],
|
||||
},
|
||||
},
|
||||
{
|
||||
type: "function",
|
||||
name: "openclaw_agent_consult",
|
||||
description: "Ask OpenClaw",
|
||||
parameters: {
|
||||
type: "object",
|
||||
properties: {
|
||||
question: { type: "string" },
|
||||
},
|
||||
required: ["question"],
|
||||
},
|
||||
},
|
||||
],
|
||||
onAudio: vi.fn(),
|
||||
onClearAudio: vi.fn(),
|
||||
@@ -175,6 +187,18 @@ describe("buildGoogleRealtimeVoiceProvider", () => {
|
||||
required: ["query"],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "openclaw_agent_consult",
|
||||
description: "Ask OpenClaw",
|
||||
parametersJsonSchema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
question: { type: "string" },
|
||||
},
|
||||
required: ["question"],
|
||||
},
|
||||
behavior: "NON_BLOCKING",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
@@ -392,6 +416,55 @@ describe("buildGoogleRealtimeVoiceProvider", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("keeps Google Live consult calls open after continuing tool responses", async () => {
|
||||
const provider = buildGoogleRealtimeVoiceProvider();
|
||||
const bridge = provider.createBridge({
|
||||
providerConfig: { apiKey: "gemini-key" },
|
||||
onAudio: vi.fn(),
|
||||
onClearAudio: vi.fn(),
|
||||
onToolCall: vi.fn(),
|
||||
});
|
||||
|
||||
await bridge.connect();
|
||||
lastConnectParams().callbacks.onmessage({
|
||||
setupComplete: { sessionId: "session-1" },
|
||||
toolCall: {
|
||||
functionCalls: [
|
||||
{ id: "consult-call", name: "openclaw_agent_consult", args: { prompt: "hi" } },
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
bridge.submitToolResult(
|
||||
"consult-call",
|
||||
{ status: "working", message: "Tell the participant you are checking." },
|
||||
{ willContinue: true },
|
||||
);
|
||||
bridge.submitToolResult("consult-call", { text: "The meeting starts at 3." });
|
||||
|
||||
expect(session.sendToolResponse).toHaveBeenNthCalledWith(1, {
|
||||
functionResponses: [
|
||||
{
|
||||
id: "consult-call",
|
||||
name: "openclaw_agent_consult",
|
||||
scheduling: "WHEN_IDLE",
|
||||
willContinue: true,
|
||||
response: { status: "working", message: "Tell the participant you are checking." },
|
||||
},
|
||||
],
|
||||
});
|
||||
expect(session.sendToolResponse).toHaveBeenNthCalledWith(2, {
|
||||
functionResponses: [
|
||||
{
|
||||
id: "consult-call",
|
||||
name: "openclaw_agent_consult",
|
||||
scheduling: "WHEN_IDLE",
|
||||
response: { text: "The meeting starts at 3." },
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it("does not send malformed Live API tool responses without a matching call name", async () => {
|
||||
const provider = buildGoogleRealtimeVoiceProvider();
|
||||
const onError = vi.fn();
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
import { randomUUID } from "node:crypto";
|
||||
import {
|
||||
ActivityHandling,
|
||||
Behavior,
|
||||
EndSensitivity,
|
||||
FunctionResponseScheduling,
|
||||
Modality,
|
||||
StartSensitivity,
|
||||
TurnCoverage,
|
||||
@@ -20,8 +22,14 @@ import type {
|
||||
RealtimeVoiceProviderConfig,
|
||||
RealtimeVoiceProviderPlugin,
|
||||
RealtimeVoiceTool,
|
||||
RealtimeVoiceToolResultOptions,
|
||||
} from "openclaw/plugin-sdk/realtime-voice";
|
||||
import {
|
||||
convertPcmToMulaw8k,
|
||||
mulawToPcm,
|
||||
REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME,
|
||||
resamplePcm,
|
||||
} from "openclaw/plugin-sdk/realtime-voice";
|
||||
import { convertPcmToMulaw8k, mulawToPcm, resamplePcm } from "openclaw/plugin-sdk/realtime-voice";
|
||||
import { normalizeResolvedSecretInputString } from "openclaw/plugin-sdk/secret-input";
|
||||
import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime";
|
||||
import { createGoogleGenAI } from "./google-genai-runtime.js";
|
||||
@@ -288,11 +296,17 @@ function buildRealtimeInputConfig(
|
||||
}
|
||||
|
||||
function buildFunctionDeclarations(tools: RealtimeVoiceTool[] | undefined): FunctionDeclaration[] {
|
||||
return (tools ?? []).map((tool) => ({
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
parametersJsonSchema: tool.parameters,
|
||||
}));
|
||||
return (tools ?? []).map((tool) => {
|
||||
const declaration: FunctionDeclaration = {
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
parametersJsonSchema: tool.parameters,
|
||||
};
|
||||
if (tool.name === REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME) {
|
||||
declaration.behavior = Behavior.NON_BLOCKING;
|
||||
}
|
||||
return declaration;
|
||||
});
|
||||
}
|
||||
|
||||
function parsePcmSampleRate(mimeType: string | undefined): number {
|
||||
@@ -306,6 +320,8 @@ function isMulawSilence(audio: Buffer): boolean {
|
||||
}
|
||||
|
||||
class GoogleRealtimeVoiceBridge implements RealtimeVoiceBridge {
|
||||
readonly supportsToolResultContinuation = true;
|
||||
|
||||
private session: GoogleLiveSession | null = null;
|
||||
private connected = false;
|
||||
private sessionConfigured = false;
|
||||
@@ -448,7 +464,11 @@ class GoogleRealtimeVoiceBridge implements RealtimeVoiceBridge {
|
||||
this.sendUserMessage(greetingPrompt);
|
||||
}
|
||||
|
||||
submitToolResult(callId: string, result: unknown): void {
|
||||
submitToolResult(
|
||||
callId: string,
|
||||
result: unknown,
|
||||
options?: RealtimeVoiceToolResultOptions,
|
||||
): void {
|
||||
if (!this.session) {
|
||||
return;
|
||||
}
|
||||
@@ -462,19 +482,34 @@ class GoogleRealtimeVoiceBridge implements RealtimeVoiceBridge {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const isConsultTool = name === REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME;
|
||||
const functionResponse: FunctionResponse = {
|
||||
id: callId,
|
||||
name,
|
||||
response:
|
||||
result && typeof result === "object" && !Array.isArray(result)
|
||||
? (result as Record<string, unknown>)
|
||||
: { output: result },
|
||||
};
|
||||
if (isConsultTool) {
|
||||
functionResponse.scheduling = FunctionResponseScheduling.WHEN_IDLE;
|
||||
if (options?.willContinue === true) {
|
||||
functionResponse.willContinue = true;
|
||||
}
|
||||
} else if (options?.willContinue === true) {
|
||||
this.config.onError?.(
|
||||
new Error(
|
||||
`Google Live continuation is only supported for ${REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME}`,
|
||||
),
|
||||
);
|
||||
return;
|
||||
}
|
||||
this.session.sendToolResponse({
|
||||
functionResponses: [
|
||||
{
|
||||
id: callId,
|
||||
name,
|
||||
response:
|
||||
result && typeof result === "object" && !Array.isArray(result)
|
||||
? (result as Record<string, unknown>)
|
||||
: { output: result },
|
||||
},
|
||||
],
|
||||
functionResponses: [functionResponse],
|
||||
});
|
||||
this.pendingFunctionNames.delete(callId);
|
||||
if (options?.willContinue !== true) {
|
||||
this.pendingFunctionNames.delete(callId);
|
||||
}
|
||||
} catch (error) {
|
||||
this.config.onError?.(
|
||||
error instanceof Error ? error : new Error("Failed to send Google Live function response"),
|
||||
|
||||
@@ -326,6 +326,7 @@ describe("createVoiceCallRuntime lifecycle", () => {
|
||||
provider: "openai",
|
||||
model: "gpt-5.4",
|
||||
toolsAllow: ["read", "web_search", "web_fetch", "x_search", "memory_search", "memory_get"],
|
||||
extraSystemPrompt: expect.stringContaining("one or two bounded read-only queries"),
|
||||
prompt: expect.stringContaining("Caller: Can you check shipment status?"),
|
||||
}),
|
||||
);
|
||||
|
||||
@@ -47,6 +47,14 @@ type MockProviderModule = typeof import("./providers/mock.js");
|
||||
type RealtimeVoiceRuntimeModule = typeof import("./realtime-voice.runtime.js");
|
||||
type RealtimeHandlerModule = typeof import("./webhook/realtime-handler.js");
|
||||
|
||||
const REALTIME_VOICE_CONSULT_SYSTEM_PROMPT = [
|
||||
"You are a behind-the-scenes consultant for a live phone voice agent.",
|
||||
"Prioritize a fast, speakable answer over exhaustive investigation.",
|
||||
"For tool-backed status checks, prefer one or two bounded read-only queries before answering.",
|
||||
"Do not print secret values or dump environment variables; only check whether required configuration is present.",
|
||||
"Be accurate, brief, and speakable.",
|
||||
].join(" ");
|
||||
|
||||
let telnyxProviderPromise: Promise<TelnyxProviderModule> | undefined;
|
||||
let twilioProviderPromise: Promise<TwilioProviderModule> | undefined;
|
||||
let plivoProviderPromise: Promise<PlivoProviderModule> | undefined;
|
||||
@@ -368,8 +376,7 @@ export async function createVoiceCallRuntime(params: {
|
||||
thinkLevel,
|
||||
timeoutMs: config.responseTimeoutMs,
|
||||
toolsAllow: resolveRealtimeVoiceAgentConsultToolsAllow(config.realtime.toolPolicy),
|
||||
extraSystemPrompt:
|
||||
"You are a behind-the-scenes consultant for a live phone voice agent. Be accurate, brief, and speakable.",
|
||||
extraSystemPrompt: REALTIME_VOICE_CONSULT_SYSTEM_PROMPT,
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
@@ -20,16 +20,17 @@ function makeRequest(url: string, host = "gateway.ts.net"): http.IncomingMessage
|
||||
return req;
|
||||
}
|
||||
|
||||
function makeBridge(): RealtimeVoiceBridge {
|
||||
function makeBridge(overrides: Partial<RealtimeVoiceBridge> = {}): RealtimeVoiceBridge {
|
||||
return {
|
||||
connect: async () => {},
|
||||
sendAudio: () => {},
|
||||
setMediaTimestamp: () => {},
|
||||
submitToolResult: () => {},
|
||||
submitToolResult: vi.fn(),
|
||||
acknowledgeMark: () => {},
|
||||
close: () => {},
|
||||
isConnected: () => true,
|
||||
triggerGreeting: () => {},
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -212,6 +213,128 @@ describe("RealtimeCallHandler path routing", () => {
|
||||
await server.close();
|
||||
}
|
||||
});
|
||||
|
||||
it("submits continuing responses only for realtime agent consult calls", async () => {
|
||||
let callbacks:
|
||||
| {
|
||||
onToolCall?: (event: {
|
||||
itemId: string;
|
||||
callId: string;
|
||||
name: string;
|
||||
args: unknown;
|
||||
}) => void;
|
||||
}
|
||||
| undefined;
|
||||
let resolveConsult: ((value: unknown) => void) | undefined;
|
||||
const submitToolResult = vi.fn();
|
||||
const bridge = makeBridge({
|
||||
supportsToolResultContinuation: true,
|
||||
submitToolResult,
|
||||
});
|
||||
const createBridge = vi.fn(
|
||||
(request: Parameters<RealtimeVoiceProviderPlugin["createBridge"]>[0]) => {
|
||||
callbacks = request;
|
||||
return bridge;
|
||||
},
|
||||
);
|
||||
const getCallByProviderCallId = vi.fn(
|
||||
(): CallRecord => ({
|
||||
callId: "call-1",
|
||||
providerCallId: "CA-tool",
|
||||
provider: "twilio",
|
||||
direction: "inbound",
|
||||
state: "ringing",
|
||||
from: "+15550001234",
|
||||
to: "+15550009999",
|
||||
startedAt: Date.now(),
|
||||
transcript: [],
|
||||
processedEventIds: [],
|
||||
metadata: {},
|
||||
}),
|
||||
);
|
||||
const handler = makeHandler(undefined, {
|
||||
manager: {
|
||||
getCallByProviderCallId,
|
||||
},
|
||||
realtimeProvider: makeRealtimeProvider(createBridge),
|
||||
});
|
||||
handler.registerToolHandler(
|
||||
"openclaw_agent_consult",
|
||||
() =>
|
||||
new Promise((resolve) => {
|
||||
resolveConsult = resolve;
|
||||
}),
|
||||
);
|
||||
handler.registerToolHandler("custom_lookup", async () => ({ ok: true }));
|
||||
const server = await startRealtimeServer(handler);
|
||||
|
||||
try {
|
||||
const ws = await connectWs(server.url);
|
||||
try {
|
||||
ws.send(
|
||||
JSON.stringify({
|
||||
event: "start",
|
||||
start: { streamSid: "MZ-tool", callSid: "CA-tool" },
|
||||
}),
|
||||
);
|
||||
await vi.waitFor(() => {
|
||||
expect(createBridge).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
callbacks?.onToolCall?.({
|
||||
itemId: "item-1",
|
||||
callId: "consult-call",
|
||||
name: "openclaw_agent_consult",
|
||||
args: { question: "Are the basement lights on?" },
|
||||
});
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(submitToolResult).toHaveBeenCalledWith(
|
||||
"consult-call",
|
||||
expect.objectContaining({
|
||||
status: "working",
|
||||
tool: "openclaw_agent_consult",
|
||||
}),
|
||||
{ willContinue: true },
|
||||
);
|
||||
});
|
||||
expect(submitToolResult).toHaveBeenCalledTimes(1);
|
||||
|
||||
resolveConsult?.({ text: "The basement lights are on." });
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(submitToolResult).toHaveBeenLastCalledWith(
|
||||
"consult-call",
|
||||
{
|
||||
text: "The basement lights are on.",
|
||||
},
|
||||
undefined,
|
||||
);
|
||||
});
|
||||
|
||||
submitToolResult.mockClear();
|
||||
callbacks?.onToolCall?.({
|
||||
itemId: "item-2",
|
||||
callId: "custom-call",
|
||||
name: "custom_lookup",
|
||||
args: {},
|
||||
});
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(submitToolResult).toHaveBeenCalledWith("custom-call", { ok: true }, undefined);
|
||||
});
|
||||
expect(submitToolResult).not.toHaveBeenCalledWith("custom-call", expect.anything(), {
|
||||
willContinue: true,
|
||||
});
|
||||
} finally {
|
||||
if (ws.readyState !== WebSocket.CLOSED && ws.readyState !== WebSocket.CLOSING) {
|
||||
ws.close();
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
await server.close();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("RealtimeCallHandler websocket hardening", () => {
|
||||
|
||||
@@ -3,7 +3,9 @@ import http from "node:http";
|
||||
import type { Duplex } from "node:stream";
|
||||
import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
|
||||
import {
|
||||
buildRealtimeVoiceAgentConsultWorkingResponse,
|
||||
createRealtimeVoiceBridgeSession,
|
||||
REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME,
|
||||
type RealtimeVoiceBridgeSession,
|
||||
type RealtimeVoiceProviderConfig,
|
||||
type RealtimeVoiceProviderPlugin,
|
||||
@@ -410,6 +412,17 @@ export class RealtimeCallHandler {
|
||||
args: unknown,
|
||||
): Promise<void> {
|
||||
const handler = this.toolHandlers.get(name);
|
||||
if (
|
||||
handler &&
|
||||
name === REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME &&
|
||||
bridge.bridge.supportsToolResultContinuation
|
||||
) {
|
||||
bridge.submitToolResult(
|
||||
bridgeCallId,
|
||||
buildRealtimeVoiceAgentConsultWorkingResponse("caller"),
|
||||
{ willContinue: true },
|
||||
);
|
||||
}
|
||||
const result = !handler
|
||||
? { error: `Tool "${name}" not available` }
|
||||
: await handler(args, callId).catch((error: unknown) => ({
|
||||
|
||||
@@ -13,10 +13,12 @@ export type {
|
||||
RealtimeVoiceRole,
|
||||
RealtimeVoiceTool,
|
||||
RealtimeVoiceToolCallEvent,
|
||||
RealtimeVoiceToolResultOptions,
|
||||
} from "../realtime-voice/provider-types.js";
|
||||
export {
|
||||
buildRealtimeVoiceAgentConsultChatMessage,
|
||||
buildRealtimeVoiceAgentConsultPrompt,
|
||||
buildRealtimeVoiceAgentConsultWorkingResponse,
|
||||
collectRealtimeVoiceAgentConsultVisibleText,
|
||||
isRealtimeVoiceAgentConsultToolPolicy,
|
||||
parseRealtimeVoiceAgentConsultArgs,
|
||||
|
||||
@@ -47,6 +47,16 @@ export const REALTIME_VOICE_AGENT_CONSULT_TOOL: RealtimeVoiceTool = {
|
||||
},
|
||||
};
|
||||
|
||||
export function buildRealtimeVoiceAgentConsultWorkingResponse(
|
||||
audienceLabel = "person",
|
||||
): Record<string, unknown> {
|
||||
return {
|
||||
status: "working",
|
||||
tool: REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME,
|
||||
message: `Tell the ${audienceLabel} briefly that you are checking, then wait for the final OpenClaw result before answering with the actual result.`,
|
||||
};
|
||||
}
|
||||
|
||||
const SAFE_READ_ONLY_TOOLS = [
|
||||
"read",
|
||||
"web_search",
|
||||
|
||||
@@ -24,6 +24,10 @@ export type RealtimeVoiceToolCallEvent = {
|
||||
args: unknown;
|
||||
};
|
||||
|
||||
export type RealtimeVoiceToolResultOptions = {
|
||||
willContinue?: boolean;
|
||||
};
|
||||
|
||||
export type RealtimeVoiceBridgeCallbacks = {
|
||||
onAudio: (muLaw: Buffer) => void;
|
||||
onClearAudio: () => void;
|
||||
@@ -70,12 +74,13 @@ export type RealtimeVoiceBrowserSession = {
|
||||
};
|
||||
|
||||
export type RealtimeVoiceBridge = {
|
||||
supportsToolResultContinuation?: boolean;
|
||||
connect(): Promise<void>;
|
||||
sendAudio(audio: Buffer): void;
|
||||
setMediaTimestamp(ts: number): void;
|
||||
sendUserMessage?(text: string): void;
|
||||
triggerGreeting?(instructions?: string): void;
|
||||
submitToolResult(callId: string, result: unknown): void;
|
||||
submitToolResult(callId: string, result: unknown, options?: RealtimeVoiceToolResultOptions): void;
|
||||
acknowledgeMark(): void;
|
||||
close(): void;
|
||||
isConnected(): boolean;
|
||||
|
||||
@@ -144,6 +144,29 @@ describe("realtime voice bridge session runtime", () => {
|
||||
expect(onToolCall).toHaveBeenCalledWith(event, session);
|
||||
});
|
||||
|
||||
it("forwards tool result continuation options to the provider bridge", () => {
|
||||
const bridge = makeBridge();
|
||||
const provider: RealtimeVoiceProviderPlugin = {
|
||||
id: "test",
|
||||
label: "Test",
|
||||
isConfigured: () => true,
|
||||
createBridge: () => bridge,
|
||||
};
|
||||
const session = createRealtimeVoiceBridgeSession({
|
||||
provider,
|
||||
providerConfig: {},
|
||||
audioSink: { sendAudio: vi.fn() },
|
||||
});
|
||||
|
||||
session.submitToolResult("call-1", { status: "working" }, { willContinue: true });
|
||||
|
||||
expect(bridge.submitToolResult).toHaveBeenCalledWith(
|
||||
"call-1",
|
||||
{ status: "working" },
|
||||
{ willContinue: true },
|
||||
);
|
||||
});
|
||||
|
||||
it("does not expose session callbacks until the provider returns its bridge", () => {
|
||||
let callbacks: Parameters<RealtimeVoiceProviderPlugin["createBridge"]>[0] | undefined;
|
||||
const bridge = makeBridge();
|
||||
|
||||
@@ -6,6 +6,7 @@ import type {
|
||||
RealtimeVoiceRole,
|
||||
RealtimeVoiceTool,
|
||||
RealtimeVoiceToolCallEvent,
|
||||
RealtimeVoiceToolResultOptions,
|
||||
} from "./provider-types.js";
|
||||
|
||||
export type RealtimeVoiceAudioSink = {
|
||||
@@ -25,7 +26,7 @@ export type RealtimeVoiceBridgeSession = {
|
||||
sendAudio(audio: Buffer): void;
|
||||
sendUserMessage(text: string): void;
|
||||
setMediaTimestamp(ts: number): void;
|
||||
submitToolResult(callId: string, result: unknown): void;
|
||||
submitToolResult(callId: string, result: unknown, options?: RealtimeVoiceToolResultOptions): void;
|
||||
triggerGreeting(instructions?: string): void;
|
||||
};
|
||||
|
||||
@@ -65,7 +66,8 @@ export function createRealtimeVoiceBridgeSession(
|
||||
sendAudio: (audio) => requireBridge().sendAudio(audio),
|
||||
sendUserMessage: (text) => requireBridge().sendUserMessage?.(text),
|
||||
setMediaTimestamp: (ts) => requireBridge().setMediaTimestamp(ts),
|
||||
submitToolResult: (callId, result) => requireBridge().submitToolResult(callId, result),
|
||||
submitToolResult: (callId, result, options) =>
|
||||
requireBridge().submitToolResult(callId, result, options),
|
||||
triggerGreeting: (instructions) => requireBridge().triggerGreeting?.(instructions),
|
||||
};
|
||||
const canSendAudio = () => params.audioSink.isOpen?.() ?? true;
|
||||
|
||||
Reference in New Issue
Block a user