mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:40:44 +00:00
fix: improve google meet twilio join sequencing
This commit is contained in:
@@ -50,6 +50,8 @@ Docs: https://docs.openclaw.ai
|
||||
- Gateway/pricing: abort in-flight model pricing catalog fetches when Gateway shutdown stops the refresh loop, and avoid post-stop cache writes or refresh timers. Fixes #72208. Thanks @rzcq.
|
||||
- Codex/app-server: make startup retry cleanup ownership-aware so concurrent Codex lanes cannot close another lane's freshly restarted shared app-server client. Thanks @vincentkoc.
|
||||
- Google Meet/Twilio: report missing dial-in details during setup and explain that Twilio cannot join Meet URLs without a phone dial plan.
|
||||
- Google Meet/Twilio: start the phone leg before sending Meet PIN DTMF, delay intro speech until after the post-connect dial sequence, and log each stage so operators can tell Twilio-leg audio from Meet-room audio.
|
||||
- Voice Call: accept provider call IDs for gateway speak/continue requests and report ended-call state from history instead of returning a generic "Call not found" for stale calls.
|
||||
- Control UI/Talk: allow the OpenAI Realtime WebRTC offer endpoint through the Control UI CSP, configure browser sessions with explicit VAD/transcription input settings, and surface OpenAI realtime error/lifecycle events instead of leaving Talk stuck as live with no diagnostic. Fixes #73427.
|
||||
- Plugins: clarify config-selected duplicate plugin override diagnostics and document manifest schema updates for bundled-plugin forks. Fixes #8582. Thanks @sachah.
|
||||
- CLI backends/Claude: make live-session JSONL turn caps bounded and configurable via `reliability.outputLimits`, raising the default guard for tool-heavy Claude CLI turns while preserving memory limits. Fixes #75838. Thanks @hcordoba840.
|
||||
|
||||
@@ -1548,19 +1548,21 @@ participant:
|
||||
- Run `openclaw voicecall tail` and check that Twilio webhooks are arriving at
|
||||
the Gateway.
|
||||
- Run `openclaw logs --follow` and look for the Twilio Meet sequence: Google
|
||||
Meet delegates the join, Voice Call stores pre-connect DTMF TwiML, serves
|
||||
that initial TwiML, then serves realtime TwiML and starts the realtime bridge
|
||||
with `initialGreeting=queued`.
|
||||
Meet delegates the join, Voice Call starts the phone leg, Google Meet waits
|
||||
`voiceCall.dtmfDelayMs`, sends DTMF with `voicecall.dtmf`, waits
|
||||
`voiceCall.postDtmfSpeechDelayMs`, then requests intro speech with
|
||||
`voicecall.speak`.
|
||||
- Re-run `openclaw googlemeet setup --transport twilio`; a green setup check is
|
||||
required but does not prove the meeting PIN sequence is correct.
|
||||
- Confirm the dial-in number belongs to the same Meet invitation and region as
|
||||
the PIN.
|
||||
- Increase the leading pauses in `--dtmf-sequence` if Meet answers slowly, for
|
||||
example `wwww123456#`.
|
||||
- Increase `voiceCall.dtmfDelayMs` if Meet answers slowly or the call transcript
|
||||
still shows the prompt asking for a PIN after DTMF was sent.
|
||||
- If the participant joins but you do not hear the greeting, check
|
||||
`openclaw logs --follow` for realtime TwiML, realtime bridge startup, and
|
||||
`initialGreeting=queued`. The greeting is generated from the initial
|
||||
`voicecall.start` message after the realtime bridge connects.
|
||||
`openclaw logs --follow` for the post-DTMF `voicecall.speak` request and
|
||||
either media-stream TTS playback or the Twilio `<Say>` fallback. If the call
|
||||
transcript still contains "enter the meeting PIN", the phone leg has not joined
|
||||
the Meet room yet, so meeting participants will not hear speech.
|
||||
|
||||
If webhooks do not arrive, debug the Voice Call plugin first: the provider must
|
||||
reach `plugins.entries.voice-call.config.publicUrl` or the configured tunnel.
|
||||
|
||||
@@ -491,7 +491,7 @@ export class GoogleMeetRuntime {
|
||||
session.notes.push(
|
||||
this.params.config.voiceCall.enabled
|
||||
? dtmfSequence
|
||||
? "Twilio transport delegated the call to the voice-call plugin and queued configured DTMF."
|
||||
? "Twilio transport delegated the phone leg to the voice-call plugin, then sent configured DTMF after connect before speaking."
|
||||
: "Twilio transport delegated the call to the voice-call plugin without configured DTMF."
|
||||
: "Twilio transport is an explicit dial plan; voice-call delegation is disabled.",
|
||||
);
|
||||
|
||||
@@ -21,39 +21,59 @@ vi.mock("openclaw/plugin-sdk/gateway-runtime", () => ({
|
||||
|
||||
describe("Google Meet voice-call gateway", () => {
|
||||
beforeEach(() => {
|
||||
vi.useRealTimers();
|
||||
gatewayMocks.request.mockReset();
|
||||
gatewayMocks.request.mockResolvedValue({ callId: "call-1" });
|
||||
gatewayMocks.stopAndWait.mockClear();
|
||||
gatewayMocks.startGatewayClientWhenEventLoopReady.mockClear();
|
||||
});
|
||||
|
||||
it("starts Twilio Meet calls with pre-connect DTMF and intro metadata", async () => {
|
||||
it("starts Twilio Meet calls, sends delayed DTMF, then speaks the intro", async () => {
|
||||
const config = resolveGoogleMeetConfig({
|
||||
voiceCall: {
|
||||
gatewayUrl: "ws://127.0.0.1:18789",
|
||||
dtmfDelayMs: 1,
|
||||
postDtmfSpeechDelayMs: 2,
|
||||
},
|
||||
realtime: { introMessage: "Say exactly: I'm here and listening." },
|
||||
});
|
||||
|
||||
await joinMeetViaVoiceCallGateway({
|
||||
const join = joinMeetViaVoiceCallGateway({
|
||||
config,
|
||||
dialInNumber: "+15551234567",
|
||||
dtmfSequence: "123456#",
|
||||
message: "Say exactly: I'm here and listening.",
|
||||
});
|
||||
|
||||
await join;
|
||||
|
||||
expect(gatewayMocks.request).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
"voicecall.start",
|
||||
{
|
||||
to: "+15551234567",
|
||||
mode: "conversation",
|
||||
message: "Say exactly: I'm here and listening.",
|
||||
dtmfSequence: "123456#",
|
||||
},
|
||||
{ timeoutMs: 30_000 },
|
||||
);
|
||||
expect(gatewayMocks.request).toHaveBeenCalledTimes(1);
|
||||
expect(gatewayMocks.request).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
"voicecall.dtmf",
|
||||
{
|
||||
callId: "call-1",
|
||||
digits: "123456#",
|
||||
},
|
||||
{ timeoutMs: 30_000 },
|
||||
);
|
||||
expect(gatewayMocks.request).toHaveBeenNthCalledWith(
|
||||
3,
|
||||
"voicecall.speak",
|
||||
{
|
||||
callId: "call-1",
|
||||
message: "Say exactly: I'm here and listening.",
|
||||
},
|
||||
{ timeoutMs: 30_000 },
|
||||
);
|
||||
expect(gatewayMocks.request).toHaveBeenCalledTimes(3);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -18,12 +18,24 @@ type VoiceCallSpeakResult = {
|
||||
error?: string;
|
||||
};
|
||||
|
||||
type VoiceCallDtmfResult = {
|
||||
success?: boolean;
|
||||
error?: string;
|
||||
};
|
||||
|
||||
type VoiceCallMeetJoinResult = {
|
||||
callId: string;
|
||||
dtmfSent: boolean;
|
||||
introSent: boolean;
|
||||
};
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
if (ms <= 0) {
|
||||
return Promise.resolve();
|
||||
}
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function createConnectedGatewayClient(
|
||||
config: GoogleMeetConfig,
|
||||
): Promise<VoiceCallGatewayClient> {
|
||||
@@ -81,15 +93,13 @@ export async function joinMeetViaVoiceCallGateway(params: {
|
||||
try {
|
||||
client = await createConnectedGatewayClient(params.config);
|
||||
params.logger?.info(
|
||||
`[google-meet] Delegating Twilio join to Voice Call (dtmf=${params.dtmfSequence ? "yes" : "no"}, intro=${params.message ? "yes" : "no"})`,
|
||||
`[google-meet] Delegating Twilio join to Voice Call (dtmf=${params.dtmfSequence ? "post-connect" : "none"}, intro=${params.message ? "delayed" : "none"})`,
|
||||
);
|
||||
const start = (await client.request(
|
||||
"voicecall.start",
|
||||
{
|
||||
to: params.dialInNumber,
|
||||
mode: "conversation",
|
||||
...(params.message ? { message: params.message } : {}),
|
||||
...(params.dtmfSequence ? { dtmfSequence: params.dtmfSequence } : {}),
|
||||
},
|
||||
{ timeoutMs: params.config.voiceCall.requestTimeoutMs },
|
||||
)) as VoiceCallStartResult;
|
||||
@@ -97,12 +107,60 @@ export async function joinMeetViaVoiceCallGateway(params: {
|
||||
throw new Error(start.error || "voicecall.start did not return callId");
|
||||
}
|
||||
params.logger?.info(
|
||||
`[google-meet] Voice Call Twilio join started: callId=${start.callId} dtmf=${params.dtmfSequence ? "yes" : "no"} intro=${params.message ? "yes" : "no"}`,
|
||||
`[google-meet] Voice Call Twilio phone leg started: callId=${start.callId}`,
|
||||
);
|
||||
let dtmfSent = false;
|
||||
if (params.dtmfSequence) {
|
||||
const delayMs = params.config.voiceCall.dtmfDelayMs;
|
||||
params.logger?.info(
|
||||
`[google-meet] Waiting ${delayMs}ms before sending Meet DTMF for callId=${start.callId}`,
|
||||
);
|
||||
await sleep(delayMs);
|
||||
const dtmf = (await client.request(
|
||||
"voicecall.dtmf",
|
||||
{
|
||||
callId: start.callId,
|
||||
digits: params.dtmfSequence,
|
||||
},
|
||||
{ timeoutMs: params.config.voiceCall.requestTimeoutMs },
|
||||
)) as VoiceCallDtmfResult;
|
||||
if (dtmf.success === false) {
|
||||
throw new Error(dtmf.error || "voicecall.dtmf failed");
|
||||
}
|
||||
dtmfSent = true;
|
||||
params.logger?.info(
|
||||
`[google-meet] Meet DTMF sent after phone leg connected: callId=${start.callId} digits=${params.dtmfSequence.length}`,
|
||||
);
|
||||
}
|
||||
let introSent = false;
|
||||
if (params.message) {
|
||||
const delayMs = params.dtmfSequence ? params.config.voiceCall.postDtmfSpeechDelayMs : 0;
|
||||
if (delayMs > 0) {
|
||||
params.logger?.info(
|
||||
`[google-meet] Waiting ${delayMs}ms after Meet DTMF before speaking intro for callId=${start.callId}`,
|
||||
);
|
||||
await sleep(delayMs);
|
||||
}
|
||||
const spoken = (await client.request(
|
||||
"voicecall.speak",
|
||||
{
|
||||
callId: start.callId,
|
||||
message: params.message,
|
||||
},
|
||||
{ timeoutMs: params.config.voiceCall.requestTimeoutMs },
|
||||
)) as VoiceCallSpeakResult;
|
||||
if (spoken.success === false) {
|
||||
throw new Error(spoken.error || "voicecall.speak failed");
|
||||
}
|
||||
introSent = true;
|
||||
params.logger?.info(
|
||||
`[google-meet] Intro speech requested after Meet dial sequence: callId=${start.callId}`,
|
||||
);
|
||||
}
|
||||
return {
|
||||
callId: start.callId,
|
||||
dtmfSent: Boolean(params.dtmfSequence),
|
||||
introSent: Boolean(params.message),
|
||||
dtmfSent,
|
||||
introSent,
|
||||
};
|
||||
} finally {
|
||||
await client?.stopAndWait({ timeoutMs: 1_000 });
|
||||
|
||||
@@ -6,6 +6,7 @@ import { createTestPluginApi } from "openclaw/plugin-sdk/plugin-test-api";
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type { OpenClawPluginApi } from "./api.js";
|
||||
import type { VoiceCallRuntime } from "./runtime-entry.js";
|
||||
import type { CallRecord } from "./src/types.js";
|
||||
|
||||
let runtimeStub: VoiceCallRuntime;
|
||||
|
||||
@@ -52,8 +53,12 @@ function captureStdout() {
|
||||
}
|
||||
|
||||
function createRuntimeStub(callId = "call-1"): VoiceCallRuntime {
|
||||
const call = createCallRecord({ callId });
|
||||
return {
|
||||
config: { toNumber: "+15550001234" } as VoiceCallRuntime["config"],
|
||||
config: {
|
||||
toNumber: "+15550001234",
|
||||
realtime: { enabled: false },
|
||||
} as VoiceCallRuntime["config"],
|
||||
provider: {} as VoiceCallRuntime["provider"],
|
||||
manager: {
|
||||
initiateCall: vi.fn(async () => ({ callId, success: true })),
|
||||
@@ -64,17 +69,35 @@ function createRuntimeStub(callId = "call-1"): VoiceCallRuntime {
|
||||
speak: vi.fn(async () => ({ success: true })),
|
||||
sendDtmf: vi.fn(async () => ({ success: true })),
|
||||
endCall: vi.fn(async () => ({ success: true })),
|
||||
getCall: vi.fn((id: string) => (id === callId ? { callId } : undefined)),
|
||||
getCall: vi.fn((id: string) => (id === callId ? call : undefined)),
|
||||
getCallByProviderCallId: vi.fn(() => undefined),
|
||||
getActiveCalls: vi.fn(() => [{ callId }]),
|
||||
getActiveCalls: vi.fn(() => [call]),
|
||||
getCallHistory: vi.fn(async () => []),
|
||||
} as unknown as VoiceCallRuntime["manager"],
|
||||
webhookServer: {} as VoiceCallRuntime["webhookServer"],
|
||||
webhookServer: {
|
||||
speakRealtime: vi.fn(() => ({ success: false, error: "No active realtime bridge for call" })),
|
||||
} as unknown as VoiceCallRuntime["webhookServer"],
|
||||
webhookUrl: "http://127.0.0.1:3334/voice/webhook",
|
||||
publicUrl: null,
|
||||
stop: vi.fn(async () => {}),
|
||||
};
|
||||
}
|
||||
|
||||
function createCallRecord(overrides: Partial<CallRecord> = {}): CallRecord {
|
||||
return {
|
||||
callId: "call-1",
|
||||
provider: "mock",
|
||||
direction: "outbound",
|
||||
state: "active",
|
||||
from: "+15550001111",
|
||||
to: "+15550001234",
|
||||
startedAt: Date.UTC(2026, 4, 2, 9, 0, 0),
|
||||
transcript: [],
|
||||
processedEventIds: [],
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function createServiceContext(): Parameters<NonNullable<Registered["service"]>["start"]>[0] {
|
||||
return {
|
||||
config: {},
|
||||
@@ -397,6 +420,60 @@ describe("voice-call plugin", () => {
|
||||
expect(respond.mock.calls[0]).toEqual([true, { success: true }]);
|
||||
});
|
||||
|
||||
it("normalizes provider call ids before speaking", async () => {
|
||||
runtimeStub.manager.getCall = vi.fn(() => undefined);
|
||||
runtimeStub.manager.getCallByProviderCallId = vi.fn(() =>
|
||||
createCallRecord({
|
||||
callId: "call-1",
|
||||
providerCallId: "CA123",
|
||||
}),
|
||||
);
|
||||
const { methods } = setup({ provider: "mock" });
|
||||
const handler = methods.get("voicecall.speak") as
|
||||
| ((ctx: {
|
||||
params: Record<string, unknown>;
|
||||
respond: ReturnType<typeof vi.fn>;
|
||||
}) => Promise<void>)
|
||||
| undefined;
|
||||
const respond = vi.fn();
|
||||
|
||||
await handler?.({ params: { callId: "CA123", message: "hello" }, respond });
|
||||
|
||||
expect(runtimeStub.manager.speak).toHaveBeenCalledWith("call-1", "hello");
|
||||
expect(respond.mock.calls[0]).toEqual([true, { success: true }]);
|
||||
});
|
||||
|
||||
it("reports ended call history when speaking to a stale call", async () => {
|
||||
runtimeStub.manager.getCall = vi.fn(() => undefined);
|
||||
runtimeStub.manager.getCallByProviderCallId = vi.fn(() => undefined);
|
||||
runtimeStub.manager.getCallHistory = vi.fn(async () => [
|
||||
createCallRecord({
|
||||
callId: "call-1",
|
||||
providerCallId: "CA123",
|
||||
state: "completed",
|
||||
endReason: "completed",
|
||||
endedAt: Date.UTC(2026, 4, 2, 9, 18, 23),
|
||||
}),
|
||||
]);
|
||||
const { methods } = setup({ provider: "mock" });
|
||||
const handler = methods.get("voicecall.speak") as
|
||||
| ((ctx: {
|
||||
params: Record<string, unknown>;
|
||||
respond: ReturnType<typeof vi.fn>;
|
||||
}) => Promise<void>)
|
||||
| undefined;
|
||||
const respond = vi.fn();
|
||||
|
||||
await handler?.({ params: { callId: "CA123", message: "hello" }, respond });
|
||||
|
||||
const [ok, , error] = respond.mock.calls[0] ?? [];
|
||||
expect(ok).toBe(false);
|
||||
expect(error.message).toContain("call is not active");
|
||||
expect(error.message).toContain("last state=completed");
|
||||
expect(error.message).toContain("endReason=completed");
|
||||
expect(runtimeStub.manager.speak).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("normalizes legacy config through runtime creation and warns to run doctor", async () => {
|
||||
const { methods } = setup({
|
||||
enabled: true,
|
||||
|
||||
@@ -302,6 +302,22 @@ export default definePluginEntry({
|
||||
respondError(respond, formatErrorMessage(err));
|
||||
};
|
||||
|
||||
const describeHistoricalCall = async (rt: VoiceCallRuntime, callId: string) => {
|
||||
const history = await rt.manager.getCallHistory(100);
|
||||
const call = history
|
||||
.toReversed()
|
||||
.find((candidate) => candidate.callId === callId || candidate.providerCallId === callId);
|
||||
if (!call) {
|
||||
return undefined;
|
||||
}
|
||||
const details = [
|
||||
`last state=${call.state}`,
|
||||
call.endReason ? `endReason=${call.endReason}` : undefined,
|
||||
call.endedAt ? `endedAt=${new Date(call.endedAt).toISOString()}` : undefined,
|
||||
].filter(Boolean);
|
||||
return `call is not active (${details.join(", ")})`;
|
||||
};
|
||||
|
||||
const resolveCallMessageRequest = async (params: GatewayRequestHandlerOptions["params"]) => {
|
||||
const callId = normalizeOptionalString(params?.callId) ?? "";
|
||||
const message = normalizeOptionalString(params?.message) ?? "";
|
||||
@@ -309,7 +325,11 @@ export default definePluginEntry({
|
||||
return { error: "callId and message required" } as const;
|
||||
}
|
||||
const rt = await ensureRuntime();
|
||||
return { rt, callId, message } as const;
|
||||
const activeCall = rt.manager.getCall(callId) ?? rt.manager.getCallByProviderCallId(callId);
|
||||
if (activeCall) {
|
||||
return { rt, callId: activeCall.callId, message } as const;
|
||||
}
|
||||
return { error: (await describeHistoricalCall(rt, callId)) ?? "Call not found" } as const;
|
||||
};
|
||||
|
||||
const initiateCallAndRespond = async (params: {
|
||||
|
||||
Reference in New Issue
Block a user