feat(voice-call): improve realtime Meet voice agent

* feat(voice-call): inject agent context into realtime voice

* fix(voice-call): stabilize realtime meet audio

* fix(voice-call): delegate realtime consults to agent

* Improve realtime Meet voice consult routing

* Pin voice consult delivery to call session

* Move voice changelog entries to changes

* fix(voice-call): isolate final realtime transcripts

* test(voice-call): trim redundant realtime coverage
This commit is contained in:
scoootscooob
2026-05-05 12:56:31 -07:00
committed by GitHub
parent 782963ae66
commit 79dd65e208
35 changed files with 2088 additions and 137 deletions

View File

@@ -3,13 +3,16 @@ import type {
ProviderReplaySessionEntry,
ProviderSanitizeReplayHistoryContext,
} from "openclaw/plugin-sdk/plugin-entry";
import { createTestPluginApi } from "openclaw/plugin-sdk/plugin-test-api";
import {
registerProviderPlugin,
requireRegisteredProvider,
} from "openclaw/plugin-sdk/plugin-test-runtime";
import { createCapturedThinkingConfigStream } from "openclaw/plugin-sdk/provider-test-contracts";
import type { RealtimeVoiceProviderPlugin } from "openclaw/plugin-sdk/realtime-voice";
import { describe, expect, it } from "vitest";
import { registerGoogleGeminiCliProvider } from "./gemini-cli-provider.js";
import googlePlugin from "./index.js";
import { registerGoogleProvider } from "./provider-registration.js";
const googleProviderPlugin = {
@@ -226,4 +229,26 @@ describe("google provider plugin hooks", () => {
expect(googleProvider.buildReplayPolicy).toBe(cliProvider.buildReplayPolicy);
expect(googleProvider.wrapStreamFn).toBe(cliProvider.wrapStreamFn);
});
it("buffers early realtime audio while the lazy Google bridge loads", () => {
let realtimeProvider: RealtimeVoiceProviderPlugin | undefined;
googlePlugin.register(
createTestPluginApi({
registerRealtimeVoiceProvider(provider) {
realtimeProvider = provider;
},
}),
);
const bridge = realtimeProvider?.createBridge({
providerConfig: { apiKey: "gemini-key" },
onAudio() {},
onClearAudio() {},
});
expect(bridge).toBeDefined();
expect(() => bridge?.sendAudio(Buffer.alloc(160))).not.toThrow();
expect(() => bridge?.setMediaTimestamp(20)).not.toThrow();
expect(() => bridge?.sendUserMessage?.("hello")).not.toThrow();
});
});

View File

@@ -200,11 +200,18 @@ function resolveGoogleRealtimeEnvApiKey(): string | undefined {
);
}
const GOOGLE_REALTIME_LAZY_MAX_PENDING_AUDIO_CHUNKS = 320;
function createLazyGoogleRealtimeVoiceBridge(
req: RealtimeVoiceBridgeCreateRequest,
): RealtimeVoiceBridge {
let bridge: RealtimeVoiceBridge | undefined;
let bridgePromise: Promise<RealtimeVoiceBridge> | undefined;
let closed = false;
let latestMediaTimestamp: number | undefined;
let pendingGreeting: string | undefined;
const pendingAudio: Buffer[] = [];
const pendingUserMessages: string[] = [];
const loadBridge = async () => {
if (!bridgePromise) {
bridgePromise = loadGoogleRealtimeVoiceProvider().then((provider) =>
@@ -220,20 +227,78 @@ function createLazyGoogleRealtimeVoiceBridge(
}
return bridge;
};
const flushPending = (loadedBridge: RealtimeVoiceBridge) => {
if (typeof latestMediaTimestamp === "number") {
loadedBridge.setMediaTimestamp(latestMediaTimestamp);
}
for (const audio of pendingAudio.splice(0)) {
loadedBridge.sendAudio(audio);
}
for (const text of pendingUserMessages.splice(0)) {
loadedBridge.sendUserMessage?.(text);
}
if (pendingGreeting !== undefined) {
const greeting = pendingGreeting;
pendingGreeting = undefined;
loadedBridge.triggerGreeting?.(greeting);
}
};
return {
supportsToolResultContinuation: true,
connect: async () => {
await (await loadBridge()).connect();
const loadedBridge = await loadBridge();
if (closed) {
loadedBridge.close();
return;
}
await loadedBridge.connect();
flushPending(loadedBridge);
},
sendAudio: (audio) => {
if (bridge) {
bridge.sendAudio(audio);
return;
}
if (!closed) {
if (pendingAudio.length >= GOOGLE_REALTIME_LAZY_MAX_PENDING_AUDIO_CHUNKS) {
pendingAudio.shift();
}
pendingAudio.push(audio);
}
},
setMediaTimestamp: (ts) => {
latestMediaTimestamp = ts;
bridge?.setMediaTimestamp(ts);
},
sendUserMessage: (text) => {
if (bridge) {
bridge.sendUserMessage?.(text);
return;
}
if (!closed) {
pendingUserMessages.push(text);
}
},
triggerGreeting: (instructions) => {
if (bridge) {
bridge.triggerGreeting?.(instructions);
return;
}
if (!closed) {
pendingGreeting = instructions;
}
},
sendAudio: (audio) => requireBridge().sendAudio(audio),
setMediaTimestamp: (ts) => requireBridge().setMediaTimestamp(ts),
sendUserMessage: (text) => requireBridge().sendUserMessage?.(text),
triggerGreeting: (instructions) => requireBridge().triggerGreeting?.(instructions),
handleBargeIn: (options) => requireBridge().handleBargeIn?.(options),
submitToolResult: (callId, result, options) =>
requireBridge().submitToolResult(callId, result, options),
acknowledgeMark: () => requireBridge().acknowledgeMark(),
close: () => bridge?.close(),
close: () => {
closed = true;
pendingAudio.length = 0;
pendingUserMessages.length = 0;
pendingGreeting = undefined;
bridge?.close();
},
isConnected: () => bridge?.isConnected() ?? false,
};
}

View File

@@ -16,7 +16,7 @@ type MockGoogleLiveConnectParams = {
onopen: () => void;
onmessage: (message: Record<string, unknown>) => void;
onerror: (event: { error?: unknown; message?: string }) => void;
onclose: () => void;
onclose: (event?: { code?: number; reason?: string; wasClean?: boolean }) => void;
};
};
@@ -352,6 +352,47 @@ describe("buildGoogleRealtimeVoiceProvider", () => {
expect(lastConnectParams().config.sessionResumption).toEqual({ handle: "resume-1" });
});
it("reconnects unexpected Google Live closes with the latest resumption handle", async () => {
vi.useFakeTimers();
try {
const provider = buildGoogleRealtimeVoiceProvider();
const onClose = vi.fn();
const onError = vi.fn();
const bridge = provider.createBridge({
providerConfig: { apiKey: "gemini-key" },
onAudio: vi.fn(),
onClearAudio: vi.fn(),
onClose,
onError,
});
await bridge.connect();
lastConnectParams().callbacks.onmessage({
setupComplete: { sessionId: "session-1" },
sessionResumptionUpdate: { resumable: true, newHandle: "resume-1" },
});
lastConnectParams().callbacks.onclose({
code: 1011,
reason: "temporary upstream close",
wasClean: false,
});
expect(onClose).not.toHaveBeenCalled();
expect(onError).toHaveBeenCalledWith(
expect.objectContaining({
message: expect.stringContaining("reconnecting 1/3"),
}),
);
await vi.advanceTimersByTimeAsync(250);
expect(connectMock).toHaveBeenCalledTimes(2);
expect(lastConnectParams().config.sessionResumption).toEqual({ handle: "resume-1" });
} finally {
vi.useRealTimers();
}
});
it("waits for setup completion before draining audio and firing ready", async () => {
const provider = buildGoogleRealtimeVoiceProvider();
const onReady = vi.fn();

View File

@@ -50,6 +50,9 @@ const MAX_PENDING_AUDIO_CHUNKS = 320;
const DEFAULT_AUDIO_STREAM_END_SILENCE_MS = 500;
const GOOGLE_REALTIME_BROWSER_SESSION_TTL_MS = 30 * 60 * 1000;
const GOOGLE_REALTIME_BROWSER_NEW_SESSION_TTL_MS = 60 * 1000;
const GOOGLE_REALTIME_RECONNECT_MAX_ATTEMPTS = 3;
const GOOGLE_REALTIME_RECONNECT_BASE_DELAY_MS = 250;
const GOOGLE_REALTIME_RECONNECT_MAX_DELAY_MS = 2_000;
const MULAW_LINEAR_SAMPLES = new Int16Array(256);
for (let i = 0; i < MULAW_LINEAR_SAMPLES.length; i += 1) {
@@ -401,6 +404,24 @@ function isPcm16Silence(audio: Buffer): boolean {
return true;
}
function formatGoogleLiveCloseEvent(
event:
| {
code?: number;
reason?: string;
wasClean?: boolean;
}
| undefined,
): string {
if (!event) {
return "code=unknown reason=unknown";
}
const code = typeof event.code === "number" ? event.code : "unknown";
const reason = event.reason?.trim() || "none";
const clean = typeof event.wasClean === "boolean" ? ` clean=${event.wasClean}` : "";
return `code=${code} reason=${reason}${clean}`;
}
class GoogleRealtimeVoiceBridge implements RealtimeVoiceBridge {
readonly supportsToolResultContinuation = true;
@@ -415,6 +436,8 @@ class GoogleRealtimeVoiceBridge implements RealtimeVoiceBridge {
private pendingFunctionNames = new Map<string, string>();
private readonly audioFormat: RealtimeVoiceAudioFormat;
private resumptionHandle: string | undefined;
private reconnectAttempts = 0;
private reconnectTimer: ReturnType<typeof setTimeout> | undefined;
constructor(private readonly config: GoogleRealtimeVoiceBridgeConfig) {
this.audioFormat = config.audioFormat ?? REALTIME_VOICE_AUDIO_FORMAT_G711_ULAW_8KHZ;
@@ -464,13 +487,23 @@ class GoogleRealtimeVoiceBridge implements RealtimeVoiceBridge {
);
this.config.onError?.(error);
},
onclose: () => {
onclose: (event) => {
this.connected = false;
this.sessionConfigured = false;
this.pendingFunctionNames.clear();
const reason = this.intentionallyClosed ? "completed" : "error";
this.session = null;
this.config.onClose?.(reason);
if (this.intentionallyClosed) {
this.config.onClose?.("completed");
return;
}
const closeDetails = formatGoogleLiveCloseEvent(event);
if (this.scheduleReconnect(closeDetails)) {
return;
}
this.config.onError?.(
new Error(`Google Live session closed after reconnect attempts: ${closeDetails}`),
);
this.config.onClose?.("error");
},
},
})) as GoogleLiveSession;
@@ -596,6 +629,10 @@ class GoogleRealtimeVoiceBridge implements RealtimeVoiceBridge {
this.intentionallyClosed = true;
this.connected = false;
this.sessionConfigured = false;
if (this.reconnectTimer) {
clearTimeout(this.reconnectTimer);
this.reconnectTimer = undefined;
}
this.pendingAudio = [];
this.consecutiveSilenceMs = 0;
this.audioStreamEnded = false;
@@ -667,6 +704,7 @@ class GoogleRealtimeVoiceBridge implements RealtimeVoiceBridge {
private handleSetupComplete(): void {
this.sessionConfigured = true;
this.reconnectAttempts = 0;
for (const chunk of this.pendingAudio.splice(0)) {
this.sendAudio(chunk);
}
@@ -739,6 +777,36 @@ class GoogleRealtimeVoiceBridge implements RealtimeVoiceBridge {
});
}
}
private scheduleReconnect(closeDetails: string): boolean {
if (this.reconnectAttempts >= GOOGLE_REALTIME_RECONNECT_MAX_ATTEMPTS) {
return false;
}
const attempt = ++this.reconnectAttempts;
const delayMs = Math.min(
GOOGLE_REALTIME_RECONNECT_MAX_DELAY_MS,
GOOGLE_REALTIME_RECONNECT_BASE_DELAY_MS * 2 ** (attempt - 1),
);
this.config.onError?.(
new Error(
`Google Live session closed unexpectedly (${closeDetails}); reconnecting ${attempt}/${GOOGLE_REALTIME_RECONNECT_MAX_ATTEMPTS} in ${delayMs}ms`,
),
);
this.reconnectTimer = setTimeout(() => {
this.reconnectTimer = undefined;
if (this.intentionallyClosed) {
return;
}
this.connect().catch((error: unknown) => {
const message = error instanceof Error ? error.message : String(error);
this.config.onError?.(error instanceof Error ? error : new Error(message));
if (!this.scheduleReconnect(`connect failed: ${message}`)) {
this.config.onClose?.("error");
}
});
}, delayMs);
return true;
}
}
function convertMulaw8kToPcm16k(muLaw: Buffer): Buffer {