chore: log meet twilio voice flow

This commit is contained in:
Peter Steinberger
2026-05-01 07:38:31 +01:00
parent fc1c597dbf
commit c677861032
10 changed files with 65 additions and 5 deletions

View File

@@ -6,6 +6,7 @@ Docs: https://docs.openclaw.ai
### Changes
- Voice Call/Google Meet: add Twilio Meet join phase logs around pre-connect DTMF, realtime stream setup, and initial greeting handoff for easier live-call debugging. Thanks @donkeykong91 and @PfanP.
- Messages/docs: clarify that `BodyForAgent` is the primary inbound model text while `Body` is the legacy envelope fallback, and add Signal coverage so channel hardening patches target the real prompt path. Refs #66198. Thanks @defonota3box.
- Control UI/Usage: add UTC quarter-hour token buckets for the Usage Mosaic and reuse them for hour filtering, keeping the legacy session-span fallback for older summaries. (#74337) Thanks @konanok.
- BlueBubbles: add opt-in `channels.bluebubbles.replyContextApiFallback` that fetches the original message from the BlueBubbles HTTP API when the in-memory reply-context cache misses (multi-instance deployments sharing one BB account, post-restart, after long-lived TTL/LRU eviction). Off by default; channel-level setting propagates to accounts that omit the flag through `mergeAccountConfig`; routed through the typed `BlueBubblesClient` so every fetch is SSRF-guarded by the same three-mode policy as every other BB client request; reply-id shape is validated and part-index prefixes (`p:0/<guid>`) are stripped before the request; concurrent webhooks for the same `replyToId` coalesce into one fetch and successful responses populate the reply cache for subsequent hits. Also promotes BlueBubbles attachment download failures from verbose to runtime error so silently-dropped inbound images are visible at default log level, and extends `sanitizeForLog` to redact `?password=…`/`?token=…` query params and `Authorization:` headers before they reach the log sink (CWE-532). (#71820) Thanks @coletebou and @zqchris.

View File

@@ -1133,6 +1133,8 @@ Expected Twilio state:
`twilio-voice-call-credentials`, and `twilio-voice-call-webhook` checks.
- `voicecall` is available in the CLI after Gateway reload.
- The returned session has `transport: "twilio"` and a `twilio.voiceCallId`.
- `openclaw logs --follow` shows DTMF TwiML served before realtime TwiML, then a
realtime bridge with the initial greeting queued.
- `googlemeet leave <sessionId>` hangs up the delegated voice call.
## Troubleshooting
@@ -1407,6 +1409,10 @@ participant:
active.
- Run `openclaw voicecall tail` and check that Twilio webhooks are arriving at
the Gateway.
- Run `openclaw logs --follow` and look for the Twilio Meet sequence: Google
Meet delegates the join, Voice Call stores pre-connect DTMF TwiML, serves
that initial TwiML, then serves realtime TwiML and starts the realtime bridge
with `initialGreeting=queued`.
- Re-run `openclaw googlemeet setup --transport twilio`; a green setup check is
required but does not prove the meeting PIN sequence is correct.
- Confirm the dial-in number belongs to the same Meet invitation and region as
@@ -1414,9 +1420,9 @@ participant:
- Increase the leading pauses in `--dtmf-sequence` if Meet answers slowly, for
example `wwww123456#`.
- If the participant joins but you do not hear the greeting, check
`openclaw voicecall tail` for a Twilio stream start followed by realtime
provider readiness. The greeting is now generated from the initial
`voicecall.start` message after the stream connects.
`openclaw logs --follow` for realtime TwiML, realtime bridge startup, and
`initialGreeting=queued`. The greeting is generated from the initial
`voicecall.start` message after the realtime bridge connects.
If webhooks do not arrive, debug the Voice Call plugin first: the provider must
reach `plugins.entries.voice-call.config.publicUrl` or the configured tunnel.

View File

@@ -723,6 +723,7 @@ Then inspect runtime state:
```bash
openclaw voicecall status --call-id <id>
openclaw voicecall tail
openclaw logs --follow
```
Common causes:
@@ -775,6 +776,19 @@ For Twilio calls, Voice Call serves the DTMF TwiML first, redirects back to the
webhook, then opens the realtime media stream so the saved intro is generated
after the phone participant has joined the meeting.
Use `openclaw logs --follow` for the live phase trace. A healthy Twilio Meet
join logs this order:
- Google Meet delegates the Twilio join to Voice Call.
- Voice Call stores pre-connect DTMF TwiML.
- Twilio initial TwiML is consumed and served before realtime handling.
- Voice Call serves realtime TwiML for the Twilio call.
- The realtime bridge starts with the initial greeting queued.
`openclaw voicecall tail` still shows persisted call records; it is useful for
call state and transcripts, but not every webhook/realtime transition appears
there.
### Realtime call has no speech
Confirm only one audio mode is enabled. `realtime.enabled` and
@@ -785,8 +799,8 @@ For realtime Twilio calls, also verify:
- A realtime provider plugin is loaded and registered.
- `realtime.provider` is unset or names a registered provider.
- The provider API key is available to the Gateway process.
- `openclaw voicecall tail` shows the media stream accepted and realtime
provider readiness before the initial greeting.
- `openclaw logs --follow` shows realtime TwiML served, the realtime bridge
started, and the initial greeting queued.
## Related

View File

@@ -977,6 +977,7 @@ describe("google-meet plugin", () => {
config: expect.objectContaining({ defaultTransport: "twilio" }),
dialInNumber: "+15551234567",
dtmfSequence: "123456#",
logger: expect.objectContaining({ info: expect.any(Function) }),
message: "Say exactly: I'm here and listening.",
});
});

View File

@@ -403,6 +403,7 @@ export class GoogleMeetRuntime {
config: this.params.config,
dialInNumber,
dtmfSequence,
logger: this.params.logger,
message:
mode === "realtime"
? (request.message ??

View File

@@ -2,6 +2,7 @@ import {
GatewayClient,
startGatewayClientWhenEventLoopReady,
} from "openclaw/plugin-sdk/gateway-runtime";
import type { RuntimeLogger } from "openclaw/plugin-sdk/plugin-runtime";
import type { GoogleMeetConfig } from "./config.js";
type VoiceCallGatewayClient = InstanceType<typeof GatewayClient>;
@@ -72,12 +73,16 @@ export async function joinMeetViaVoiceCallGateway(params: {
config: GoogleMeetConfig;
dialInNumber: string;
dtmfSequence?: string;
logger?: RuntimeLogger;
message?: string;
}): Promise<VoiceCallMeetJoinResult> {
let client: VoiceCallGatewayClient | undefined;
try {
client = await createConnectedGatewayClient(params.config);
params.logger?.info(
`[google-meet] Delegating Twilio join to Voice Call (dtmf=${params.dtmfSequence ? "yes" : "no"}, intro=${params.message ? "yes" : "no"})`,
);
const start = (await client.request(
"voicecall.start",
{
@@ -91,6 +96,9 @@ export async function joinMeetViaVoiceCallGateway(params: {
if (!start.callId) {
throw new Error(start.error || "voicecall.start did not return callId");
}
params.logger?.info(
`[google-meet] Voice Call Twilio join started: callId=${start.callId} dtmf=${params.dtmfSequence ? "yes" : "no"} intro=${params.message ? "yes" : "no"}`,
);
return {
callId: start.callId,
dtmfSent: Boolean(params.dtmfSequence),

View File

@@ -185,6 +185,9 @@ export async function initiateCall(
console.log(`[voice-call] Using inline TwiML for notify mode (voice: ${pollyVoice})`);
} else if (dtmfSequence) {
preConnectTwiml = generateDtmfRedirectTwiml(dtmfSequence, ctx.webhookUrl);
console.log(
`[voice-call] Using pre-connect DTMF TwiML for call ${callId} (digits=${dtmfSequence.length}, initialMessage=${initialMessage ? "yes" : "no"})`,
);
}
const result = await ctx.provider.initiateCall({
@@ -199,6 +202,9 @@ export async function initiateCall(
callRecord.providerCallId = result.providerCallId;
ctx.providerCallIdMap.set(result.providerCallId, callId);
persistCallRecord(ctx.storePath, callRecord);
console.log(
`[voice-call] Outbound call initiated: callId=${callId} providerCallId=${result.providerCallId} mode=${mode} preConnectDtmf=${preConnectTwiml ? "yes" : "no"} initialMessage=${initialMessage ? "yes" : "no"}`,
);
return { callId, success: true };
} catch (err) {

View File

@@ -456,7 +456,11 @@ export class TwilioProvider implements VoiceCallProvider {
if (!storedTwiml) {
return null;
}
const kind = this.notifyCalls.has(view.callIdFromQuery) ? "notify" : "pre-connect";
this.deleteStoredTwiml(view.callIdFromQuery);
console.log(
`[voice-call] Twilio initial TwiML consumed for call ${view.callIdFromQuery} (kind=${kind}, callSid=${view.callSid ?? "unknown"})`,
);
return storedTwiml;
}
@@ -550,8 +554,14 @@ export class TwilioProvider implements VoiceCallProvider {
if (input.inlineTwiml) {
this.twimlStorage.set(input.callId, input.inlineTwiml);
this.notifyCalls.add(input.callId);
console.log(
`[voice-call] Stored Twilio initial TwiML for call ${input.callId} (kind=notify)`,
);
} else if (input.preConnectTwiml) {
this.twimlStorage.set(input.callId, input.preConnectTwiml);
console.log(
`[voice-call] Stored Twilio initial TwiML for call ${input.callId} (kind=pre-connect)`,
);
}
// Build request params - always use URL-based TwiML.

View File

@@ -674,6 +674,10 @@ export class VoiceCallWebhookServer {
const initialTwiML = this.provider.consumeInitialTwiML?.(ctx);
if (initialTwiML !== undefined && initialTwiML !== null) {
const params = new URLSearchParams(ctx.rawBody);
console.log(
`[voice-call] Serving provider initial TwiML before realtime handling (callSid=${params.get("CallSid") ?? "unknown"}, direction=${params.get("Direction") ?? "unknown"})`,
);
return {
statusCode: 200,
headers: { "Content-Type": "application/xml" },
@@ -689,6 +693,9 @@ export class VoiceCallWebhookServer {
console.log("[voice-call] Realtime inbound call rejected before stream setup");
return buildRealtimeRejectedTwiML();
}
console.log(
`[voice-call] Serving realtime TwiML for Twilio call ${realtimeParams.get("CallSid") ?? "unknown"} (direction=${direction ?? "unknown"})`,
);
return this.realtimeHandler!.buildTwiMLPayload(req, realtimeParams);
}

View File

@@ -258,6 +258,9 @@ export class RealtimeCallHandler {
}
const { callId, initialGreetingInstructions } = registration;
console.log(
`[voice-call] Realtime bridge starting for call ${callId} (providerCallId=${callSid}, initialGreeting=${initialGreetingInstructions ? "queued" : "absent"})`,
);
let callEndEmitted = false;
const emitCallEnd = (reason: "completed" | "error") => {
if (callEndEmitted) {
@@ -396,6 +399,9 @@ export class RealtimeCallHandler {
}
const initialGreeting = this.extractInitialGreeting(callRecord);
console.log(
`[voice-call] Realtime call ${callRecord.callId} initial greeting ${initialGreeting ? "queued" : "absent"}`,
);
if (callRecord.metadata) {
delete callRecord.metadata.initialMessage;
}