fix: improve google meet twilio join sequencing

This commit is contained in:
Peter Steinberger
2026-05-02 10:56:08 +01:00
parent 59fb9e5ca7
commit 1634f91a35
7 changed files with 204 additions and 25 deletions

View File

@@ -491,7 +491,7 @@ export class GoogleMeetRuntime {
session.notes.push(
this.params.config.voiceCall.enabled
? dtmfSequence
? "Twilio transport delegated the call to the voice-call plugin and queued configured DTMF."
? "Twilio transport delegated the phone leg to the voice-call plugin, then sent configured DTMF after connect before speaking."
: "Twilio transport delegated the call to the voice-call plugin without configured DTMF."
: "Twilio transport is an explicit dial plan; voice-call delegation is disabled.",
);

View File

@@ -21,39 +21,59 @@ vi.mock("openclaw/plugin-sdk/gateway-runtime", () => ({
describe("Google Meet voice-call gateway", () => {
beforeEach(() => {
vi.useRealTimers();
gatewayMocks.request.mockReset();
gatewayMocks.request.mockResolvedValue({ callId: "call-1" });
gatewayMocks.stopAndWait.mockClear();
gatewayMocks.startGatewayClientWhenEventLoopReady.mockClear();
});
it("starts Twilio Meet calls with pre-connect DTMF and intro metadata", async () => {
it("starts Twilio Meet calls, sends delayed DTMF, then speaks the intro", async () => {
const config = resolveGoogleMeetConfig({
voiceCall: {
gatewayUrl: "ws://127.0.0.1:18789",
dtmfDelayMs: 1,
postDtmfSpeechDelayMs: 2,
},
realtime: { introMessage: "Say exactly: I'm here and listening." },
});
await joinMeetViaVoiceCallGateway({
const join = joinMeetViaVoiceCallGateway({
config,
dialInNumber: "+15551234567",
dtmfSequence: "123456#",
message: "Say exactly: I'm here and listening.",
});
await join;
expect(gatewayMocks.request).toHaveBeenNthCalledWith(
1,
"voicecall.start",
{
to: "+15551234567",
mode: "conversation",
message: "Say exactly: I'm here and listening.",
dtmfSequence: "123456#",
},
{ timeoutMs: 30_000 },
);
expect(gatewayMocks.request).toHaveBeenCalledTimes(1);
expect(gatewayMocks.request).toHaveBeenNthCalledWith(
2,
"voicecall.dtmf",
{
callId: "call-1",
digits: "123456#",
},
{ timeoutMs: 30_000 },
);
expect(gatewayMocks.request).toHaveBeenNthCalledWith(
3,
"voicecall.speak",
{
callId: "call-1",
message: "Say exactly: I'm here and listening.",
},
{ timeoutMs: 30_000 },
);
expect(gatewayMocks.request).toHaveBeenCalledTimes(3);
});
});

View File

@@ -18,12 +18,24 @@ type VoiceCallSpeakResult = {
error?: string;
};
type VoiceCallDtmfResult = {
success?: boolean;
error?: string;
};
type VoiceCallMeetJoinResult = {
callId: string;
dtmfSent: boolean;
introSent: boolean;
};
function sleep(ms: number): Promise<void> {
if (ms <= 0) {
return Promise.resolve();
}
return new Promise((resolve) => setTimeout(resolve, ms));
}
async function createConnectedGatewayClient(
config: GoogleMeetConfig,
): Promise<VoiceCallGatewayClient> {
@@ -81,15 +93,13 @@ export async function joinMeetViaVoiceCallGateway(params: {
try {
client = await createConnectedGatewayClient(params.config);
params.logger?.info(
`[google-meet] Delegating Twilio join to Voice Call (dtmf=${params.dtmfSequence ? "yes" : "no"}, intro=${params.message ? "yes" : "no"})`,
`[google-meet] Delegating Twilio join to Voice Call (dtmf=${params.dtmfSequence ? "post-connect" : "none"}, intro=${params.message ? "delayed" : "none"})`,
);
const start = (await client.request(
"voicecall.start",
{
to: params.dialInNumber,
mode: "conversation",
...(params.message ? { message: params.message } : {}),
...(params.dtmfSequence ? { dtmfSequence: params.dtmfSequence } : {}),
},
{ timeoutMs: params.config.voiceCall.requestTimeoutMs },
)) as VoiceCallStartResult;
@@ -97,12 +107,60 @@ export async function joinMeetViaVoiceCallGateway(params: {
throw new Error(start.error || "voicecall.start did not return callId");
}
params.logger?.info(
`[google-meet] Voice Call Twilio join started: callId=${start.callId} dtmf=${params.dtmfSequence ? "yes" : "no"} intro=${params.message ? "yes" : "no"}`,
`[google-meet] Voice Call Twilio phone leg started: callId=${start.callId}`,
);
let dtmfSent = false;
if (params.dtmfSequence) {
const delayMs = params.config.voiceCall.dtmfDelayMs;
params.logger?.info(
`[google-meet] Waiting ${delayMs}ms before sending Meet DTMF for callId=${start.callId}`,
);
await sleep(delayMs);
const dtmf = (await client.request(
"voicecall.dtmf",
{
callId: start.callId,
digits: params.dtmfSequence,
},
{ timeoutMs: params.config.voiceCall.requestTimeoutMs },
)) as VoiceCallDtmfResult;
if (dtmf.success === false) {
throw new Error(dtmf.error || "voicecall.dtmf failed");
}
dtmfSent = true;
params.logger?.info(
`[google-meet] Meet DTMF sent after phone leg connected: callId=${start.callId} digits=${params.dtmfSequence.length}`,
);
}
let introSent = false;
if (params.message) {
const delayMs = params.dtmfSequence ? params.config.voiceCall.postDtmfSpeechDelayMs : 0;
if (delayMs > 0) {
params.logger?.info(
`[google-meet] Waiting ${delayMs}ms after Meet DTMF before speaking intro for callId=${start.callId}`,
);
await sleep(delayMs);
}
const spoken = (await client.request(
"voicecall.speak",
{
callId: start.callId,
message: params.message,
},
{ timeoutMs: params.config.voiceCall.requestTimeoutMs },
)) as VoiceCallSpeakResult;
if (spoken.success === false) {
throw new Error(spoken.error || "voicecall.speak failed");
}
introSent = true;
params.logger?.info(
`[google-meet] Intro speech requested after Meet dial sequence: callId=${start.callId}`,
);
}
return {
callId: start.callId,
dtmfSent: Boolean(params.dtmfSequence),
introSent: Boolean(params.message),
dtmfSent,
introSent,
};
} finally {
await client?.stopAndWait({ timeoutMs: 1_000 });

View File

@@ -6,6 +6,7 @@ import { createTestPluginApi } from "openclaw/plugin-sdk/plugin-test-api";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import type { OpenClawPluginApi } from "./api.js";
import type { VoiceCallRuntime } from "./runtime-entry.js";
import type { CallRecord } from "./src/types.js";
let runtimeStub: VoiceCallRuntime;
@@ -52,8 +53,12 @@ function captureStdout() {
}
function createRuntimeStub(callId = "call-1"): VoiceCallRuntime {
const call = createCallRecord({ callId });
return {
config: { toNumber: "+15550001234" } as VoiceCallRuntime["config"],
config: {
toNumber: "+15550001234",
realtime: { enabled: false },
} as VoiceCallRuntime["config"],
provider: {} as VoiceCallRuntime["provider"],
manager: {
initiateCall: vi.fn(async () => ({ callId, success: true })),
@@ -64,17 +69,35 @@ function createRuntimeStub(callId = "call-1"): VoiceCallRuntime {
speak: vi.fn(async () => ({ success: true })),
sendDtmf: vi.fn(async () => ({ success: true })),
endCall: vi.fn(async () => ({ success: true })),
getCall: vi.fn((id: string) => (id === callId ? { callId } : undefined)),
getCall: vi.fn((id: string) => (id === callId ? call : undefined)),
getCallByProviderCallId: vi.fn(() => undefined),
getActiveCalls: vi.fn(() => [{ callId }]),
getActiveCalls: vi.fn(() => [call]),
getCallHistory: vi.fn(async () => []),
} as unknown as VoiceCallRuntime["manager"],
webhookServer: {} as VoiceCallRuntime["webhookServer"],
webhookServer: {
speakRealtime: vi.fn(() => ({ success: false, error: "No active realtime bridge for call" })),
} as unknown as VoiceCallRuntime["webhookServer"],
webhookUrl: "http://127.0.0.1:3334/voice/webhook",
publicUrl: null,
stop: vi.fn(async () => {}),
};
}
function createCallRecord(overrides: Partial<CallRecord> = {}): CallRecord {
return {
callId: "call-1",
provider: "mock",
direction: "outbound",
state: "active",
from: "+15550001111",
to: "+15550001234",
startedAt: Date.UTC(2026, 4, 2, 9, 0, 0),
transcript: [],
processedEventIds: [],
...overrides,
};
}
function createServiceContext(): Parameters<NonNullable<Registered["service"]>["start"]>[0] {
return {
config: {},
@@ -397,6 +420,60 @@ describe("voice-call plugin", () => {
expect(respond.mock.calls[0]).toEqual([true, { success: true }]);
});
it("normalizes provider call ids before speaking", async () => {
runtimeStub.manager.getCall = vi.fn(() => undefined);
runtimeStub.manager.getCallByProviderCallId = vi.fn(() =>
createCallRecord({
callId: "call-1",
providerCallId: "CA123",
}),
);
const { methods } = setup({ provider: "mock" });
const handler = methods.get("voicecall.speak") as
| ((ctx: {
params: Record<string, unknown>;
respond: ReturnType<typeof vi.fn>;
}) => Promise<void>)
| undefined;
const respond = vi.fn();
await handler?.({ params: { callId: "CA123", message: "hello" }, respond });
expect(runtimeStub.manager.speak).toHaveBeenCalledWith("call-1", "hello");
expect(respond.mock.calls[0]).toEqual([true, { success: true }]);
});
it("reports ended call history when speaking to a stale call", async () => {
runtimeStub.manager.getCall = vi.fn(() => undefined);
runtimeStub.manager.getCallByProviderCallId = vi.fn(() => undefined);
runtimeStub.manager.getCallHistory = vi.fn(async () => [
createCallRecord({
callId: "call-1",
providerCallId: "CA123",
state: "completed",
endReason: "completed",
endedAt: Date.UTC(2026, 4, 2, 9, 18, 23),
}),
]);
const { methods } = setup({ provider: "mock" });
const handler = methods.get("voicecall.speak") as
| ((ctx: {
params: Record<string, unknown>;
respond: ReturnType<typeof vi.fn>;
}) => Promise<void>)
| undefined;
const respond = vi.fn();
await handler?.({ params: { callId: "CA123", message: "hello" }, respond });
const [ok, , error] = respond.mock.calls[0] ?? [];
expect(ok).toBe(false);
expect(error.message).toContain("call is not active");
expect(error.message).toContain("last state=completed");
expect(error.message).toContain("endReason=completed");
expect(runtimeStub.manager.speak).not.toHaveBeenCalled();
});
it("normalizes legacy config through runtime creation and warns to run doctor", async () => {
const { methods } = setup({
enabled: true,

View File

@@ -302,6 +302,22 @@ export default definePluginEntry({
respondError(respond, formatErrorMessage(err));
};
const describeHistoricalCall = async (rt: VoiceCallRuntime, callId: string) => {
const history = await rt.manager.getCallHistory(100);
const call = history
.toReversed()
.find((candidate) => candidate.callId === callId || candidate.providerCallId === callId);
if (!call) {
return undefined;
}
const details = [
`last state=${call.state}`,
call.endReason ? `endReason=${call.endReason}` : undefined,
call.endedAt ? `endedAt=${new Date(call.endedAt).toISOString()}` : undefined,
].filter(Boolean);
return `call is not active (${details.join(", ")})`;
};
const resolveCallMessageRequest = async (params: GatewayRequestHandlerOptions["params"]) => {
const callId = normalizeOptionalString(params?.callId) ?? "";
const message = normalizeOptionalString(params?.message) ?? "";
@@ -309,7 +325,11 @@ export default definePluginEntry({
return { error: "callId and message required" } as const;
}
const rt = await ensureRuntime();
return { rt, callId, message } as const;
const activeCall = rt.manager.getCall(callId) ?? rt.manager.getCallByProviderCallId(callId);
if (activeCall) {
return { rt, callId: activeCall.callId, message } as const;
}
return { error: (await describeHistoricalCall(rt, callId)) ?? "Call not found" } as const;
};
const initiateCallAndRespond = async (params: {